#conding:utf-8 import unittest from selenium import webdriver from urllib.request import * import re import time from bs4 import BeautifulSoup #测试类 class baidupic(unittest.TestCase): #初始化测试 def setUp(self): self.dv = webdriver.PhantomJS() #测试方法 def test_getPic(self): dv = self.dv dv.get("http://image.baidu.com/") dv.find_element_by_id("kw").send_keys("美女") dv.find_element_by_class_name("s_btn").click() time.sleep(1) #滚轮到最下面,滚动的次数越多,下载的美女图片就越多 js = "window.scrollTo(0, document.body.scrollHeight)" dv.execute_script(js) time.sleep(1) dv.execute_script(js) time.sleep(1) #正则获取图片地址,宽度,高度,后缀 pattern = re.compile(u‘data-objurl="(.*?)" data-thumburl=".*?" data-fromurl=".*?" data-fromurlhost=".*?" data-ext="(.*?)" data-saved=".*?" data-pi=".*?" data-specialtype=".*?" data-cs=".*?" data-width="(.*?)" data-height="(.*?)" data-hostname=‘,re.S) items = re.findall(pattern,dv.page_source) index = 1 for item in items: print("图片地址:%s\r\n类型:%s\r\n宽度:%s\r\n高度:%s\r\n " % (item[0],item[1],item[2],item[3])) try: self.saveImg(item[0],"d:\\mm\\%s.%s"%(index,item[1])) except: continue index = index + 1 #保存图片到本地 def saveImg(self,imgURL,fileName): img = urlopen(imgURL) data = img.read() f = open(fileName,"wb") f.write(data) f.close() #结束测试 def tearDown(self): self.dv.quit()
时间: 2024-11-06 20:00:42