|
| 1 | +import urllib2 |
| 2 | +from email.MIMEText import MIMEText |
| 3 | +import smtplib |
| 4 | +from bs4 import BeautifulSoup |
| 5 | + |
| 6 | +GMAIL_LOGIN = 'pyladiestest@gmail.com' |
| 7 | +GMAIL_PASSWORD = 'YOU NO CAN HAZ' |
| 8 | + |
| 9 | + |
| 10 | +def send_email(subject, message, from_addr=GMAIL_LOGIN, to_addr=GMAIL_LOGIN): |
| 11 | + msg = MIMEText(message) |
| 12 | + msg['Subject'] = subject |
| 13 | + msg['From'] = from_addr |
| 14 | + msg['To'] = to_addr |
| 15 | + msg['Reply-To'] = 'happyhours@noreply.com' |
| 16 | + |
| 17 | + server = smtplib.SMTP('smtp.gmail.com', 587) # port 465 or 587 |
| 18 | + server.ehlo() |
| 19 | + server.starttls() |
| 20 | + server.ehlo() |
| 21 | + server.login(GMAIL_LOGIN, GMAIL_PASSWORD) |
| 22 | + server.sendmail(from_addr, to_addr, msg.as_string()) |
| 23 | + server.close() |
| 24 | + |
| 25 | + |
| 26 | +def get_site_html(url): |
| 27 | + source = urllib2.urlopen(url).read() |
| 28 | + return source |
| 29 | + |
| 30 | + |
| 31 | +def get_tree(url): |
| 32 | + source = get_site_html(url) |
| 33 | + tree = BeautifulSoup(source) |
| 34 | + return tree |
| 35 | + |
| 36 | + |
| 37 | +if __name__ == '__main__': |
| 38 | + |
| 39 | + stuff_i_like = ['burger', 'wine', 'sushi', 'sweet potato fries', 'BBQ'] |
| 40 | + found_happy_hours = [] |
| 41 | + my_happy_hours = [] |
| 42 | + |
| 43 | + # First, I'm going to identify the areas of the page I want to look at |
| 44 | + tables = get_tree( |
| 45 | + 'http://www.downtownla.com/3_10_happyHours.asp?action=ALL') |
| 46 | + |
| 47 | + # Then, I'm going to sort out the *exact* parts of the page |
| 48 | + # that match what I'm looking for... |
| 49 | + for t in tables.findAll('p', {'class': 'calendar_EventTitle'}): |
| 50 | + text = t.text |
| 51 | + for s in t.findNextSiblings(): |
| 52 | + text += '\n' + s.text |
| 53 | + found_happy_hours.append(text) |
| 54 | + |
| 55 | + print "The scraper found %d happy hours!" % len(found_happy_hours) |
| 56 | + |
| 57 | + # Now I'm going to loop through the food I like |
| 58 | + # and see if any of the happy hour descriptions match |
| 59 | + for food in stuff_i_like: |
| 60 | + for hh in found_happy_hours: |
| 61 | + # checking for text AND making sure I don't have duplicates |
| 62 | + if food in hh and hh not in my_happy_hours: |
| 63 | + print "YAY! I found some %s!" % food |
| 64 | + my_happy_hours.append(hh) |
| 65 | + |
| 66 | + print "I think you might like %d of them, yipeeeee!" % len(my_happy_hours) |
| 67 | + |
| 68 | + # Now, let's make a mail message we can read: |
| 69 | + message = 'Hey Katharine,\n\n\n' |
| 70 | + message += 'OMG, I found some stuff for you in Downtown, take a look.\n\n' |
| 71 | + message += '==============================\n'.join(my_happy_hours) |
| 72 | + message = message.encode('utf-8') |
| 73 | + # To read more about encoding: |
| 74 | + # http://diveintopython.org/xml_processing/unicode.html |
| 75 | + message = message.replace('\t', '').replace('\r', '') |
| 76 | + message += '\n\nXOXO,\n Your Py Script' |
| 77 | + |
| 78 | + # And email it to ourselves! |
| 79 | + email = 'katharine@pyladies.com' |
| 80 | + send_email('Happy Hour Update', message, from_addr=GMAIL_LOGIN, |
| 81 | + to_addr=email) |
0 commit comments