from lxml import etreeimport requestsdef getHtml(html): novelcontent = requests.get(html).content return etree.HTML(novelcontent) source = getHtml("http://www.cabintu.com") urllist = source.xpath(‘//li[@class="airline"]/a‘)for i in urllist: url = i.attrib[‘href‘] sources = getHtml(url) picurl = sources.xpath(‘//img[@class="plane"]‘) for j in picurl: urls = j.attrib[‘src‘] print urls ‘‘‘listclassify = source.xpath(‘//ul[@class="sg_menu"]/li/a‘)listtype = source.xpath(‘//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]/ul[@class="subnav_a"]/li[@class="airline"]/a‘) fname = source.xpath(‘//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]/a/text()‘)for a in fname: print a typelist = source.xpath(‘//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]//ul[@class="subnav_a"]/li[@class="airline"]/a/text()‘)for b in typelist: print b‘‘‘
时间: 2024-08-03 03:02:25