# -*- coding: utf-8 -*- import requests import re import sys reload(sys) sys.setdefaultencoding(‘utf-8‘) if __name__ == ‘__main__‘: url = ‘http://photostock.china.com.cn/Web_CHN/SpecialTopicPhoto.aspx?Id=296‘ html = requests.get(url) img_src = re.findall(‘<img alt=.*?src="..(.*?)".*?/>‘, html.text, re.S) imgUrl = [] for each_src in img_src: imgUrl.append("http://photostock.china.com.cn" + each_src) picName = 100 for each in imgUrl: imgContext = requests.get(each).content with open("lovelyAnimals/" + str(picName) + ".jpg", "wb") as code: code.write(imgContext) picName += 1 ‘‘‘ 下载文件的3种方法 (1): 使用urllib.urlretrieve方法,可在callbackfunc函数中显示下载进度 def callbackfunc(blocknum, blocksize, totalsize): # 回调函数 # @blocknum: # 已经下载的数据块 # @blocksize: # 数据块的大小 # @totalsize: # 远程文件的大小 percent = 100.0 * blocknum * blocksize / totalsize if percent > 100: percent = 100 print "%.2f%%"% percent url = ‘http://www.sina.com.cn‘ local = ‘lovelyAnimals/sina.html‘ urllib.urlretrieve(url, local, callbackfunc) (2):使用urllib2.urlopen import urllib2 url = ‘http://www.sina.com.cn‘ f = urllib2.urlopen(url) data = f.read() with open("lovelyAnimals/sina.html", "wb") as code: code.write(data) (3):使用requests模块 import requests url = ‘http://www.sina.com.cn‘ html = requests.get(url) with open("lovelyAnimals/sina.html", "wb") as code: code.write(html.content) ‘‘‘
时间: 2024-10-10 03:55:19