# -*-coding:utf-8-*— ‘‘‘ 题目描述: 用 Python 写一个爬图片的程序,爬 这个链接里的日本妹子图片 :-) 地址: http://tieba.baidu.com/p/2166231880 思路: 用正则表达式匹配图片链接,然后进行下载 ‘‘‘ ‘‘‘ import re import requests def main(): url = ‘http://tieba.baidu.com/p/2166231880‘ response = requests.get(url) html = response.text match = re.compile(‘img .*?src=\"(.*?)\"‘) for i in match.findall(html): if ‘imgsrc‘ in i : print i if __name__ == ‘__main__‘: main() ‘‘‘ import urllib2 import re from os.path import basename from urlparse import urlsplit url = "http://tieba.baidu.com/p/2166231880" def getPage(url): url=url+"?see_lz=1" urlContent = urllib2.urlopen(url).read() page=‘<span class="red">(.*?)</span>‘ thePage=re.findall(page,urlContent) return int(thePage[0]) def downImg(url): urlContent = urllib2.urlopen(url).read() spans=‘<cc>(.*?)</cc>‘ ss=re.findall(spans,urlContent) obImgs=‘,‘.join(ss) imgUrls = re.findall(‘img .*?src="(.*?)"‘, obImgs) for imgUrl in imgUrls: print imgUrl ‘‘‘ try: imgData = urllib2.urlopen(imgUrl).read() fileName = basename(urlsplit(imgUrl)[2]) output = open(fileName,‘wb‘) output.write(imgData) output.close() except: print "Er.." ‘‘‘ def downLoad(url): numb=getPage(url) cont=0 print "There are "+str(numb)+" pages." while cont<numb: cont+=1 print "Downloading "+url+"?see_lz=1&pn="+str(cont)+"..." downImg(url+"?see_lz=1&pn="+str(cont)) print ‘Completed!‘ downImg(url)
时间: 2024-10-09 20:57:18