File tree 2 files changed +5
-6
lines changed
2 files changed +5
-6
lines changed Original file line number Diff line number Diff line change 11
11
second_level = "./DownLoad/Image/"
12
12
13
13
14
- def page_downloader (tar_url , host = None ):
14
+ def page_downloader (tar_url , Referer = None , host = None ):
15
15
url_content = ""
16
16
try :
17
17
url_content = requests .get (tar_url ,
Original file line number Diff line number Diff line change 6
6
page_list = []
7
7
max_page = 0
8
8
RegEx_img = "<a href=\" ([\w\/\.]+?)\" target"
9
- RegEx_max_page = "\" current-comment-page\" >[(\d*?)]</span>"
9
+ RegEx_max_page = "\" current-comment-page\" >\ [(\d*?)\ ]</span>"
10
10
11
11
#获取访问过的页面列表
12
12
rec_dict = Record .Read ()
15
15
#获取最新的页面值
16
16
content = Downloader .page_downloader ("http://jandan.net/ooxx" )
17
17
max_page = PageParser .parser (content , RegEx = RegEx_max_page )[0 ]
18
- print ("解析结果:{}" .format (PageParser .parser (content , RegEx = RegEx_max_page )))
19
18
20
19
#构造页面地址
21
- for index in range (max_page , 0 , - 1 ):
20
+ for index in range (int ( max_page ) , 0 , - 1 ):
22
21
page_list .append ("http://jandan.net/ooxx/page-{}#comments" .format (index ))
23
22
24
23
#遍历页面
43
42
for t in threads_list :
44
43
t .join ()
45
44
46
- #把最新的页面,最新的页数,访问过的页面记录
47
- Record .Write ({"page_num" : index , "page_url" : page_item , "viewed" : page_viewed })
45
+ # 把最新的页面,最新的页数,访问过的页面记录
46
+ Record .Write ({"page_num" : int ( max_page ) , "page_url" : page_list [ 0 ] , "viewed" : page_viewed })
You can’t perform that action at this time.
0 commit comments