1
+ from bs4 import BeautifulSoup
2
+ import urllib .request as req
3
+ from tabulate import tabulate
4
+
5
+ def getResponse (url ):
6
+ response = req .urlopen (url )
7
+ data = response .read ()
8
+ soup = BeautifulSoup (data , "lxml" )
9
+ #print(soup.prettify("utf-8"))
10
+ return soup
11
+
12
+ def selectChoice ():
13
+
14
+ '''options = {
15
+ 1: ('top'),
16
+ 2: ('moviemeter'),
17
+ 3: ('top-english-movies'),
18
+ 4: ('toptv'),
19
+ 5: ('tvmeter'),
20
+ 6: ('bottom'),
21
+ 7: ('boxoffice')
22
+ }
23
+ '''
24
+ options_map = {
25
+ 1 : ('Top movies' , 'top' ),
26
+ 2 : ('Most Popular Movies' , 'moviemeter' ),
27
+ 3 : ('Top English Movies' , 'top-english-movies' ),
28
+ 4 : ('Top TV Shows' , 'toptv' ),
29
+ 5 : ('Most Popular TV Shows' , 'tvmeter' ),
30
+ 6 : ('Low Rated Movies' , 'bottom' ),
31
+ 7 : ('Top Box Office collection' , 'boxoffice' )
32
+ }
33
+
34
+ for i ,option in enumerate (options_map ,1 ):
35
+ print ("{}) {}" .format (i ,options_map [option ][0 ]))
36
+
37
+ choice = int (input ('\n Choice please..\n ' ))
38
+ while (choice < 1 or choice > len (options_map )):
39
+ print ('Wrong choice, enter again..' )
40
+ choice = int (input ('\n Choice please..\n ' ))
41
+ return options_map [choice ][1 ]
42
+
43
+ def getData (base_url , option ):
44
+ complete_url = base_url + option
45
+ soup = getResponse (complete_url )
46
+ card_list = soup .find_all ('span' ,{'class' :'media-body media-vertical-align' }) #material card list
47
+ result = []
48
+ count = 1
49
+ for card in card_list :
50
+ try :
51
+ name = card .find ('h4' ).text .replace ("\n " ," " ).lstrip ("0123456789.- " ) #removes order indexes for movies 1,2,3,4,...
52
+ except :
53
+ pass
54
+ try :
55
+ rating = card .find ('p' ).text .strip ()
56
+ except :
57
+ pass
58
+ result .append ([count ,name ,rating ])
59
+ count += 1
60
+ print (tabulate (result , headers = ["Index" , "Name" , "Ratings" ], tablefmt = "grid" ))
61
+
62
+ def main ():
63
+ base_url = "http://m.imdb.com/chart/"
64
+ choice = selectChoice ()
65
+ #print(choice)
66
+ getData (base_url , choice )
67
+
68
+ if __name__ == '__main__' :
69
+ main ()
70
+
71
+
72
+ '''
73
+ #table formats
74
+ - "plain"
75
+ - "simple"
76
+ - "grid"
77
+ - "fancy_grid"
78
+ - "pipe"
79
+ - "orgtbl"
80
+ - "jira"
81
+ - "presto"
82
+ - "psql"
83
+ - "rst"
84
+ - "mediawiki"
85
+ - "moinmoin"
86
+ - "youtrack"
87
+ - "html"
88
+ - "latex"
89
+ - "latex_raw"
90
+ - "latex_booktabs"
91
+ - "textile"
92
+ '''
0 commit comments