|
| 1 | +# encoding: utf-8 |
| 2 | +# using Python 3.5 |
| 3 | +from bs4 import BeautifulSoup as BS |
| 4 | +from urllib import request |
| 5 | +import os |
| 6 | + |
| 7 | +# 下载函数 |
| 8 | +def download(target): |
| 9 | + baseUrl = "http://matplotlib.org/examples/" |
| 10 | + url = target['href'] |
| 11 | + |
| 12 | + # 文件夹名称 |
| 13 | + dirName = url[:url.find(r'/')] |
| 14 | + |
| 15 | + # 判断文件夹是否存在,不存在则创建 |
| 16 | + if not os.path.exists('examples'+'\\'+dirName): |
| 17 | + os.system('mkdir examples'+'\\'+dirName) |
| 18 | + |
| 19 | + response = request.urlopen(baseUrl+url).read() |
| 20 | + soup = BS(response, 'html.parser', from_encoding='utf-8') |
| 21 | + |
| 22 | + # 获取新打开页面的下载对象的URL,不存在则返回 |
| 23 | + try: |
| 24 | + fileName = soup.find('a', class_='external')['href'] |
| 25 | + print('Trying {}'.format(fileName)) |
| 26 | + except: |
| 27 | + return |
| 28 | + |
| 29 | + # 完整的URL,可以直接用来下载 |
| 30 | + wholeFileUrl = baseUrl + dirName + '/' + fileName |
| 31 | + |
| 32 | + # 文件内容 |
| 33 | + file = request.urlopen(wholeFileUrl).read() |
| 34 | + |
| 35 | + makeFile(file, fileName, dirName) |
| 36 | + print('File {} done!'.format(fileName)) |
| 37 | + |
| 38 | +# 文件生成函数 |
| 39 | +def makeFile(file, fileName, dirName): |
| 40 | + fileName = checkFileName(fileName) |
| 41 | + try: |
| 42 | + f = open('examples/'+dirName+'/'+fileName, 'wb') |
| 43 | + except: |
| 44 | + return |
| 45 | + f.write(file) |
| 46 | + f.close() |
| 47 | + |
| 48 | +# 文件名校验函数 |
| 49 | +def checkFileName(fileName): |
| 50 | + # 如果找不到'/'的话返回原名 |
| 51 | + if fileName.find('/') == -1: |
| 52 | + return fileName |
| 53 | + |
| 54 | + # 通过最后一个'/'的索引获取文件名 |
| 55 | + index = [] |
| 56 | + i = 0 |
| 57 | + for fileNameChar in fileName: |
| 58 | + if fileNameChar == '/': |
| 59 | + index.append(i) |
| 60 | + i += 1 |
| 61 | + index = index[len(index)-1] |
| 62 | + return fileName[index+1:] |
| 63 | + |
| 64 | +# 断点续传功能 |
| 65 | +def makeBreakPoint(breakPoint, mode): |
| 66 | + with open('breakPoint.ini', mode) as f: |
| 67 | + if mode == 'r': |
| 68 | + breakPoint = f.read().strip() |
| 69 | + f.close() |
| 70 | + return breakPoint |
| 71 | + elif mode == 'w': |
| 72 | + f.write(breakPoint) |
| 73 | + f.close() |
| 74 | +if __name__ == "__main__": |
| 75 | + html = open('source.html', 'r') |
| 76 | + soup = BS(html, 'html.parser', from_encoding='utf-8') |
| 77 | + targets = soup.find('div', id='target').find_all('a') |
| 78 | + |
| 79 | + # 可以自行设置断点,如果为空则从配置中读取 |
| 80 | + breakPoint = "fancytextbox_demo" |
| 81 | + if breakPoint == "": |
| 82 | + breakPoint = makeBreakPoint(breakPoint, 'r') |
| 83 | + if breakPoint: |
| 84 | + breakPointDone = False |
| 85 | + for target in targets: |
| 86 | + if target.string == breakPoint: |
| 87 | + download(target) |
| 88 | + breakPointDone = True |
| 89 | + elif breakPointDone: |
| 90 | + breakPoint = target.string |
| 91 | + makeBreakPoint(breakPoint, 'w') |
| 92 | + download(target) |
0 commit comments