Skip to content

Commit af80de1

Browse files
Added Google news scraper
1 parent df532bd commit af80de1

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

gnews-scraper/gnews.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import urllib2
2+
from xml.dom.minidom import parseString
3+
4+
def get_google_new_results( term, count ):
5+
results = []
6+
obj = parseString( urllib2.urlopen('http://news.google.com/news?q=%s&output=rss' % term).read() )
7+
items = obj.getElementsByTagName('item') # Get each item
8+
for item in items[:count]:
9+
t,l = '', ''
10+
for node in item.childNodes:
11+
if node.nodeName == 'title':
12+
t = node.childNodes[0].data
13+
elif node.nodeName == 'link':
14+
l = node.childNodes[0].data
15+
results.append( (t,l) )
16+
return results
17+
18+
19+
print "Enter term to scrape from"
20+
x = str(raw_input(""))
21+
22+
items = get_google_new_results( x, 50 )
23+
for title,link in items:
24+
print title, ' ', link, '\n'

0 commit comments

Comments
 (0)