其实只是想练习一下threading的用法。
写完后发现其实下载速度也没增加多少,略显尴尬,汗
# -*- coding: cp936 -*- import urllib2 import threading import os,os.path import sys import time,datetime url = ‘‘ path = os.getcwd() file_name = ‘‘ file_fp=‘‘ net_filesize = 0 size_download = 0 threadlock = threading.Lock() Thread_Num = 10 Block_Size = 16384 def get_filename_from_url(url): return url[url.rfind(‘/‘)+1:] def get_net_file_info(url): r = urllib2.urlopen(url) fs = int(r.info()[‘Content-Length‘]) if r.info().has_key(‘Content-Disposition‘): fn = r.info()[‘Content-Disposition‘].split(‘filename=‘)[1] fn = fn.replace(‘"‘, ‘‘).replace("‘", "") else: fn = get_filename_from_url(url) r.close() return fs,fn def file_write_init(): global file_fp,net_filesize,file_name net_filesize,file_name = get_net_file_info(url) full_path= os.path.join(path,file_name) if os.path.isfile(full_path) == True:return False file_fp = file(full_path,"wb") file_fp.truncate(net_filesize) return True def file_write(pos,dat): global size_download threadlock.acquire() size_download += len(dat) #print ‘size_download:%d‘%(size_download ) file_fp.seek(pos) file_fp.write(dat) threadlock.release() def file_write_finish(): file_fp.close() class mpdown(threading.Thread): def __init__(self,range_start,range_end): threading.Thread.__init__(self) self.range_start = range_start self.range_end = range_end def run(self): req = urllib2.Request(url) req.add_header(‘Range‘,‘bytes=%d-%d‘%(self.range_start,self.range_end)) r = urllib2.urlopen(req) #print r.info() pos_base = self.range_start pos = 0 size_to_get = self.range_end - self.range_start +1 while pos<size_to_get: dat = r.read(Block_Size) l = len(dat) if pos+len(dat)>size_to_get: dat = dat[:size_to_get-pos] file_write(pos+pos_base,dat) pos += len(dat) def download_one_thread(): pass def download(): #check_range_acceptable() #if file_length<thread_num use single thread #download_one_thread() #block size of each thread time_start = datetime.datetime.now() if file_write_init() == False: return print ‘file name:%s‘%(file_name) print ‘net_filesize:%dbyte‘%(net_filesize) print ‘multi process downloading...‘ threads = [] thread_size = net_filesize / Thread_Num left_size = net_filesize % Thread_Num pos = 0 for i in xrange(0,Thread_Num): range_start = pos pos += thread_size-1 if i==0:pos += left_size range_end = pos pos += 1 t = mpdown(range_start,range_end) threads.append(t) #print ‘range=%d-%d‘%(range_start,range_end) for t in threads: t.start() for t in threads: t.join() file_write_finish() time_end = datetime.datetime.now() s = (time_end-time_start).seconds if s<1:s=1 print ‘time passed:%dh %dm %ds‘%(s/3600,s/60,s%60) print ‘speed:%dkb/s‘%(net_filesize/1000.0/s) def main(): global url args = len(sys.argv) if args>1: for i in range(1,args): url = sys.argv[i] download() else: url = raw_input("input download url:") download() def test(): req = urllib2.Request(url) req.add_header(‘Range‘,‘bytes=%d-%d‘%(1,500)) print req r = urllib2.urlopen(req) print r.info() if __name__ == ‘__main__‘: main()
时间: 2024-10-14 17:51:30