Skip to content

Commit 4e94ee2

Browse files
committed
煎蛋网妹子图
1 parent abb332e commit 4e94ee2

File tree

2 files changed

+5
-6
lines changed

2 files changed

+5
-6
lines changed

JandanSpider/Util/Downloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
second_level = "./DownLoad/Image/"
1212

1313

14-
def page_downloader(tar_url, host=None):
14+
def page_downloader(tar_url, Referer=None, host=None):
1515
url_content = ""
1616
try:
1717
url_content = requests.get(tar_url,

JandanSpider/demo.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
page_list = []
77
max_page = 0
88
RegEx_img = "<a href=\"([\w\/\.]+?)\" target"
9-
RegEx_max_page = "\"current-comment-page\">[(\d*?)]</span>"
9+
RegEx_max_page = "\"current-comment-page\">\[(\d*?)\]</span>"
1010

1111
#获取访问过的页面列表
1212
rec_dict = Record.Read()
@@ -15,10 +15,9 @@
1515
#获取最新的页面值
1616
content = Downloader.page_downloader("http://jandan.net/ooxx")
1717
max_page = PageParser.parser(content, RegEx=RegEx_max_page)[0]
18-
print("解析结果:{}".format(PageParser.parser(content, RegEx=RegEx_max_page)))
1918

2019
#构造页面地址
21-
for index in range(max_page, 0, -1):
20+
for index in range(int(max_page), 0, -1):
2221
page_list.append("http://jandan.net/ooxx/page-{}#comments".format(index))
2322

2423
#遍历页面
@@ -43,5 +42,5 @@
4342
for t in threads_list:
4443
t.join()
4544

46-
#把最新的页面,最新的页数,访问过的页面记录
47-
Record.Write({"page_num": index, "page_url": page_item, "viewed": page_viewed})
45+
# 把最新的页面,最新的页数,访问过的页面记录
46+
Record.Write({"page_num": int(max_page), "page_url": page_list[0], "viewed": page_viewed})

0 commit comments

Comments
 (0)