python写的简单有效的爬虫代码
by 伍雪颖
import re import urllib def getHtml(url): html = urllib.urlopen(url) scode = html.read() return scode def getImage(source): reg = r'src="(.*?\.jpg)"' imgre = re.compile(reg) images = re.findall(imgre,source) x = 0 for i in images: urllib.urlretrieve(i,'%s.jpg' % x) x+=1 source = getHtml('http://tieba.baidu.com/p/3237470549') print getImage(source)
python写的简单有效的爬虫代码
时间: 2024-10-24 23:44:05