kjam
diff --git a/‎.gitignore
Lines changed: 2 additions & 0 deletions b/‎.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 43 additions & 0 deletions b/‎README.md
Lines changed: 43 additions & 0 deletions
diff --git a/‎bs_scraper.py
Lines changed: 81 additions & 0 deletions b/‎bs_scraper.py
Lines changed: 81 additions & 0 deletions
diff --git a/‎csv_scraper.py
Lines changed: 10 additions & 0 deletions b/‎csv_scraper.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎data/.~lock.crunchbase.xlsx#
Lines changed: 1 addition & 0 deletions b/‎data/.~lock.crunchbase.xlsx#
Lines changed: 1 addition & 0 deletions
diff --git a/‎data/crunchbase.xlsx
30.4 MB b/‎data/crunchbase.xlsx
30.4 MB
@@ -0,0 +1,2 @@
+*~
+*.pyc
@@ -0,0 +1,43 @@
+PyCon Introduction to Web and Data Scraping Tutorial
+===========================================
+
+A tutorial-based introduction to web scraping with Python. 
+
+Virtual Env
+------------
+
+If you'd like to use virtual environments, please follow the following instructions. It is not required for the tutorial but may be helpful.
+
+For more details on [virtual environments](http://www.doughellmann.com/projects/virtualenvwrapper/)
+
+If you don't have virtual env wrapper and/or pip: 
+    
+    $ easy_install pip
+    $ pip install virtualenvwrapper
+    
+and read the additional instructions [here](http://virtualenvwrapper.readthedocs.org/en/latest/install.html)
+
+
+    $ mkvirtualenv scraper_tutorial
+    $ pip install -r requirements.txt
+
+
+LXML and Selenium
+-------------------------
+You will need both [LXML](http://lxml.de/) and [Selenium](http://selenium-python.readthedocs.org/en/latest/index.html) to follow this tutorial in it's entirety.
+
+If you are using a Mac, I would highly recommend using [Homebrew](http://brew.sh/). It will help make pip install *very easy* for you to use.
+    * [More help on Installing LXML on Mac](http://lxml.de/installation.html#installation)
+    * [And additional suggestions for LXML on Mac](http://stackoverflow.com/questions/1277124/how-do-you-install-lxml-on-os-x-leopard-without-using-macports-or-fink)
+
+If you are using Windows, it might be worth it to run this within a Linux Virtual Machine. If you are a Windows + Python guru, please follow these installation instructions. I can help as needed but I have not programmed on Windows in more than 5 years.
+    * [Installing Selenium on Windows](http://selenium-python.readthedocs.org/en/latest/installation.html#detailed-instructions-for-windows-users)
+    * [Installing LXML on Windows](http://lxml.de/installation.html#ms-windows)
+
+Please reach out to me if you have any questions on getting the initial requirements set up. Thanks!
+
+
+Questions?
+----------
+/msg kjam on freenode or @kjam on twitter
+
@@ -0,0 +1,81 @@
+import urllib2
+from email.MIMEText import MIMEText
+import smtplib
+from bs4 import BeautifulSoup
+
+GMAIL_LOGIN = 'pyladiestest@gmail.com'
+GMAIL_PASSWORD = 'YOU NO CAN HAZ'
+
+
+def send_email(subject, message, from_addr=GMAIL_LOGIN, to_addr=GMAIL_LOGIN):
+    msg = MIMEText(message)
+    msg['Subject'] = subject
+    msg['From'] = from_addr
+    msg['To'] = to_addr
+    msg['Reply-To'] = 'happyhours@noreply.com'
+
+    server = smtplib.SMTP('smtp.gmail.com', 587)  # port 465 or 587
+    server.ehlo()
+    server.starttls()
+    server.ehlo()
+    server.login(GMAIL_LOGIN, GMAIL_PASSWORD)
+    server.sendmail(from_addr, to_addr, msg.as_string())
+    server.close()
+
+
+def get_site_html(url):
+    source = urllib2.urlopen(url).read()
+    return source
+
+
+def get_tree(url):
+    source = get_site_html(url)
+    tree = BeautifulSoup(source)
+    return tree
+
+
+if __name__ == '__main__':
+
+    stuff_i_like = ['burger', 'wine', 'sushi', 'sweet potato fries', 'BBQ']
+    found_happy_hours = []
+    my_happy_hours = []
+
+    # First, I'm going to identify the areas of the page I want to look at
+    tables = get_tree(
+        'http://www.downtownla.com/3_10_happyHours.asp?action=ALL')
+
+    # Then, I'm going to sort out the *exact* parts of the page
+    # that match what I'm looking for...
+    for t in tables.findAll('p', {'class': 'calendar_EventTitle'}):
+        text = t.text
+        for s in t.findNextSiblings():
+            text += '\n' + s.text
+        found_happy_hours.append(text)
+
+    print "The scraper found %d happy hours!" % len(found_happy_hours)
+
+    # Now I'm going to loop through the food I like
+    # and see if any of the happy hour descriptions match
+    for food in stuff_i_like:
+        for hh in found_happy_hours:
+            # checking for text AND making sure I don't have duplicates
+            if food in hh and hh not in my_happy_hours:
+                print "YAY! I found some %s!" % food
+                my_happy_hours.append(hh)
+
+    print "I think you might like %d of them, yipeeeee!" % len(my_happy_hours)
+
+    # Now, let's make a mail message we can read:
+    message = 'Hey Katharine,\n\n\n'
+    message += 'OMG, I found some stuff for you in Downtown, take a look.\n\n'
+    message += '==============================\n'.join(my_happy_hours)
+    message = message.encode('utf-8')
+    # To read more about encoding:
+    # http://diveintopython.org/xml_processing/unicode.html
+    message = message.replace('\t', '').replace('\r', '')
+    message += '\n\nXOXO,\n Your Py Script'
+
+    # And email it to ourselves!
+    email = 'katharine@pyladies.com'
+    send_email('Happy Hour Update', message, from_addr=GMAIL_LOGIN,
+               to_addr=email)
@@ -0,0 +1,10 @@
+from csv import DictReader
+from datetime import datetime
+
+with open('/home/katharine/Downloads/schedule.csv') as document:
+    reader = DictReader(document)
+    for row in reader:
+        day = datetime.strptime(row.get('START_DATE'), '%m/%d/%y')
+        if 'PNC' in row.get('LOCATION') and day.weekday() > 4:
+            print 'HOME WEEKEND GAME!! %s on %s' % (
+                row.get('SUBJECT'), row.get('START_DATE'))
@@ -0,0 +1 @@
+katharine ,katharine,kjamistan,03.04.2014 11:59,file:///home/katharine/.config/libreoffice/4;
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+katharine ,katharine,kjamistan,03.04.2014 11:59,file:///home/katharine/.config/libreoffice/4;`