python 2.x
# -*- coding: utf-8 -*-import reimport urllib url = ‘http://tieba.baidu.com/p/4872795764‘page = urllib.urlopen(url)html = page.read() r = ‘src="(.*?\.jpg)" size‘ # 注意此处?的作用,取消贪婪匹配 结合findall方法,只匹配分组中的内容imgre = re.compile(r)imglist = re.findall(imgre, html) count = 0for imgurl in imglist: urllib.urlretrieve(imgurl, filename=‘mac_book Pro %s.jpg‘ % count) count += 1 函数: def gethtml(url): html = urllib.urlopen(url).read() return html def getimg(html): r = ‘src="(.*?\.jpg)" size‘ imgre = re.compile(r) imglist = re.findall(imgre, html) print imglist count = 0 for imgurl in imglist: urllib.urlretrieve(imgurl, filename=‘mac_book_Pro_%s.jpg‘ % count) count += 1html = gethtml(‘http://tieba.baidu.com/p/4872795764‘)getimg(html)
时间: 2024-11-09 14:46:20