1 #-*- coding:utf-8 -*- 2 __author__ = "carry" 3 import urllib 4 import urllib2 5 from bs4 import BeautifulSoup 6 7 8 url = ‘http://www.dbmeinv.com/?pager_offset=1‘ 9 x = 1 10 def crawl(url): 11 headers = {‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36‘} 12 req = urllib2.Request(url,headers=headers) 13 page = urllib2.urlopen(req,timeout=20) 14 contents = page.read() 15 #print (contents.decode(‘utf-8‘)) 16 soup = BeautifulSoup(contents,‘html.parser‘) 17 my_girl = soup.find_all(‘img‘) 18 #print my_girl 19 for girl in my_girl: 20 link = girl.get(‘src‘) 21 print link 22 global x 23 urllib.urlretrieve(link,‘image\%s.jpg‘%x) 24 print("正在下载第%s张"%x) 25 x +=1 26 27 for page in range(1,20): 28 url = ‘http://www.dbmeinv.com/?pager_offset=%d‘%page 29 crawl(url) 30 print("图片下载完毕")
时间: 2024-11-05 12:32:13