Skip to content

Commit b0f1abf

Browse files
ChiragChirag
Chirag
authored and
Chirag
committed
Added script to get movie data from imdb.com
1 parent c11b1c0 commit b0f1abf

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

imdb-scraper/SS1.PNG

28.4 KB
Loading

imdb-scraper/imdbscraper.py

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
from bs4 import BeautifulSoup
2+
import urllib.request as req
3+
from tabulate import tabulate
4+
5+
def getResponse(url):
6+
response = req.urlopen(url)
7+
data = response.read()
8+
soup = BeautifulSoup(data, "lxml")
9+
#print(soup.prettify("utf-8"))
10+
return soup
11+
12+
def selectChoice():
13+
14+
'''options = {
15+
1: ('top'),
16+
2: ('moviemeter'),
17+
3: ('top-english-movies'),
18+
4: ('toptv'),
19+
5: ('tvmeter'),
20+
6: ('bottom'),
21+
7: ('boxoffice')
22+
}
23+
'''
24+
options_map = {
25+
1: ('Top movies' , 'top'),
26+
2: ('Most Popular Movies' , 'moviemeter'),
27+
3: ('Top English Movies' , 'top-english-movies'),
28+
4: ('Top TV Shows' , 'toptv'),
29+
5: ('Most Popular TV Shows' , 'tvmeter'),
30+
6: ('Low Rated Movies', 'bottom'),
31+
7: ('Top Box Office collection', 'boxoffice')
32+
}
33+
34+
for i,option in enumerate(options_map,1):
35+
print("{}) {}".format(i,options_map[option][0]))
36+
37+
choice = int(input('\nChoice please..\n'))
38+
while(choice<1 or choice>len(options_map)):
39+
print('Wrong choice, enter again..')
40+
choice = int(input('\nChoice please..\n'))
41+
return options_map[choice][1]
42+
43+
def getData(base_url, option):
44+
complete_url = base_url + option
45+
soup = getResponse(complete_url)
46+
card_list = soup.find_all('span',{'class':'media-body media-vertical-align'}) #material card list
47+
result = []
48+
count = 1
49+
for card in card_list:
50+
try:
51+
name = card.find('h4').text.replace("\n"," ").lstrip("0123456789.- ") #removes order indexes for movies 1,2,3,4,...
52+
except:
53+
pass
54+
try:
55+
rating = card.find('p').text.strip()
56+
except:
57+
pass
58+
result.append([count,name,rating])
59+
count += 1
60+
print(tabulate(result, headers=["Index", "Name", "Ratings"], tablefmt="grid"))
61+
62+
def main():
63+
base_url = "http://m.imdb.com/chart/"
64+
choice = selectChoice()
65+
#print(choice)
66+
getData(base_url, choice)
67+
68+
if __name__ == '__main__':
69+
main()
70+
71+
72+
'''
73+
#table formats
74+
- "plain"
75+
- "simple"
76+
- "grid"
77+
- "fancy_grid"
78+
- "pipe"
79+
- "orgtbl"
80+
- "jira"
81+
- "presto"
82+
- "psql"
83+
- "rst"
84+
- "mediawiki"
85+
- "moinmoin"
86+
- "youtrack"
87+
- "html"
88+
- "latex"
89+
- "latex_raw"
90+
- "latex_booktabs"
91+
- "textile"
92+
'''

0 commit comments

Comments
 (0)