#设置超时时间爬取网页速度相对要快些#encoding:utf8from lxml import etree#xpathimport re#正则import time#时间import requests#传值from selenium import webdriver#通用阅览器from selenium.webdriver.support.ui import WebDriverWait#引用超时模块import random#自定义模块url = ‘(网址)‘driver = webdriver.Firefox()#用火狐阅览器driver.get(url)time.sleep(3)#限制时间driver.maximize_window()#页面放大thelist = []for i in range(1,10): time.sleep(1) WebDriverWait(driver,10).until(lambda the_driver: the_driver.find_element_by_xpath(‘//*[@id="tableData_"]/div[3]/nav/ul/li[last()]/a‘).is_displayed())#设定超时和超时时间,超时时间是10秒 driver.find_element_by_xpath(‘//*[@id="tableData_"]/div[3]/nav/ul//li[last()]/a‘).click()#模拟点击 yuan = driver.page_source #获取源代码 xpa = etree.HTML(yuan) zheng = xpa.xpath(‘//*[@id="tableData_"]/div[2]/table/tbody/tr/td[8]/a/@href‘) thelist = thelist+zheng # print(thelist) for k in thelist: xin = "http://www.sse.com.cn"+k # print(xin) yuna2 = driver.get(xin) driver.maximize_window() me2 = driver.page_source WebDriverWait(driver,10).until(lambda the_driver: the_driver.find_element_by_xpath(‘//*[@id="overview-slide"]/div[1]/div[2]/div/a/span‘).is_displayed())#设定超时和超时时间,超时时间是10秒 driver.find_element_by_xpath(‘//*[@id="overview-slide"]/div[1]/div[2]/div/a/span‘).click() time.sleep(2) yuna3 = driver.page_source lxmls = etree.HTML(yuna3) zhua2 = lxmls.xpath(‘//*[@id="tableData_two"]/div[2]/table/tbody/tr/td[1]/text()‘) zhua3 = lxmls.xpath(‘//*[@id="tableData_two"]/div[2]/table/tbody/tr/td[2]/text()‘) zhua4 = lxmls.xpath(‘//*[@id="tableData_two"]/div[2]/table/tbody/tr/td[3]/div/text()‘) for g in range(len(zhua2)): print(zhua2[g]) print(zhua3[g]) print(zhua4[g])
时间: 2024-12-24 05:26:36