|
| 1 | +#!/usr/bin/env python |
| 2 | +# encoding: utf-8 |
| 3 | +# author: Lock |
| 4 | +# time: 2017/12/21 18:28 |
| 5 | +# 多线程文件下载器,默认单线程 |
| 6 | + |
| 7 | +import sys |
| 8 | +import optparse |
| 9 | +import threading |
| 10 | +import requests |
| 11 | +import re |
| 12 | +import time |
| 13 | + |
| 14 | + |
| 15 | +class Download(object): |
| 16 | + def __init__(self, config_dict): |
| 17 | + self.url = config_dict['url'] |
| 18 | + self.filename = self.clear_name(config_dict['url'].split('/')[-1]) |
| 19 | + self.thread = config_dict['thread'] |
| 20 | + self.user_agent = config_dict['user_agent'] |
| 21 | + self.fileSize = 0 |
| 22 | + self.supportThread = True |
| 23 | + self.show_print = (config_dict['show_print'] == 'yes') and True or False |
| 24 | + |
| 25 | + # 移除文件名的一些特殊字符 |
| 26 | + def clear_name(self, filename): |
| 27 | + (filename, _) = re.subn(ur'[\\\/\:\*\?\"\<\>\|]', '', filename) |
| 28 | + return filename |
| 29 | + |
| 30 | + # 初始化目标文件信息 |
| 31 | + def init_file_info(self): |
| 32 | + headers = { |
| 33 | + 'User-Agent': self.user_agent, |
| 34 | + 'Range': 'bytes=0-4' |
| 35 | + } |
| 36 | + try: |
| 37 | + r = requests.head(self.url, headers=headers) |
| 38 | + rang_content = r.headers['content-range'] |
| 39 | + self.fileSize = int(re.match(ur'^bytes 0-4/(\d+)$', rang_content).group(1)) |
| 40 | + return True |
| 41 | + except Exception, e: |
| 42 | + print 'can not support breakpoint download,msg:%s' % (e.message,) |
| 43 | + |
| 44 | + try: |
| 45 | + self.fileSize = int(r.headers['content-length']) |
| 46 | + except Exception, e: |
| 47 | + self.supportThread = False |
| 48 | + print 'can not support multi thread download , error:%s' % (e.message,) |
| 49 | + return False |
| 50 | + |
| 51 | + def start_part_download(self, thread_id, start_index, stop_index): |
| 52 | + try: |
| 53 | + headers = {'Range': 'bytes=%d-%d' % (start_index, stop_index,), 'User-Agent': self.user_agent} |
| 54 | + r = requests.get(self.url, headers=headers, stream=True, allow_redirects=True) |
| 55 | + if r.status_code == 206: |
| 56 | + with open(self.filename, "rb+") as fp: |
| 57 | + fp.seek(start_index) |
| 58 | + fp.write(r.content) |
| 59 | + if self.show_print: |
| 60 | + sys.stdout.write('thread %s download part size:%.2f KB\n' % (thread_id, (r.content.__len__()) / 1024)) |
| 61 | + sys.stdout.flush() |
| 62 | + except Exception, e: |
| 63 | + if self.show_print: |
| 64 | + sys.stdout.write('下载出现错误,错误位置:%s,状态码:%s,错误信息:%s\n' % (start_index, r.status_code, e.message)) |
| 65 | + sys.stdout.flush() |
| 66 | + |
| 67 | + def run(self): |
| 68 | + print 'Start...' |
| 69 | + start_time = time.time() |
| 70 | + self.init_file_info() |
| 71 | + # 创建一个和要下载文件一样大小的文件 |
| 72 | + with open(self.filename, "wb") as fp: |
| 73 | + fp.truncate(self.fileSize) |
| 74 | + |
| 75 | + if self.fileSize > 0: |
| 76 | + if self.supportThread is False and self.thread > 1: |
| 77 | + print 'sorry,only support single thread' |
| 78 | + self.thread = 1 |
| 79 | + print 'Thread count is:%s' % (self.thread,) |
| 80 | + part = self.fileSize / self.thread |
| 81 | + for i in xrange(0, self.thread): |
| 82 | + start_index = part * i |
| 83 | + stop_index = start_index + part |
| 84 | + if i == self.thread - 1: |
| 85 | + stop_index = self.fileSize |
| 86 | + download_args = {'thread_id': i, 'start_index': start_index, 'stop_index': stop_index} |
| 87 | + worker = threading.Thread(target=self.start_part_download, kwargs=download_args) |
| 88 | + worker.setDaemon(True) |
| 89 | + worker.start() |
| 90 | + # 等待所有线程下载完成 |
| 91 | + main_thread = threading.current_thread() |
| 92 | + for t in threading.enumerate(): |
| 93 | + if t is main_thread: |
| 94 | + continue |
| 95 | + t.join() |
| 96 | + print 'Success.\nTime:%.2fs , Size:%.2fKB' % (time.time() - start_time, self.fileSize / 1024) |
| 97 | + else: |
| 98 | + print 'Can not download' |
| 99 | + |
| 100 | + |
| 101 | +if __name__ == '__main__': |
| 102 | + parser = optparse.OptionParser(usage='python %s.py [options]' % (sys.argv[0],)) |
| 103 | + parser.add_option('-u', dest='url', type='string', help='specify download resource url') |
| 104 | + parser.add_option('-t', dest='thread', type='int', help='specify download thread count', default=1) |
| 105 | + parser.add_option('-p', dest='show_print', type='string', help='yes/no,show print info,default enable', default='yes') |
| 106 | + parser.add_option("-a", dest="user_agent", help="specify request user agent", default='Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:57.0) Gecko/20100101 Firefox/57.0') |
| 107 | + (options, args) = parser.parse_args() |
| 108 | + if options.url is None: |
| 109 | + parser.print_help() |
| 110 | + exit() |
| 111 | + config = { |
| 112 | + 'url': options.url, |
| 113 | + 'thread': options.thread, |
| 114 | + 'user_agent': options.user_agent, |
| 115 | + 'show_print': options.show_print |
| 116 | + } |
| 117 | + try: |
| 118 | + Download(config).run() |
| 119 | + except KeyboardInterrupt: |
| 120 | + print '\nCancel Download' |
0 commit comments