import scrapy from xxxx.items import XXXXItem from scrapy.http.request import Request class ZndsSpider(scrapy.spiders.Spider): name = "xxxx" allowed_domains = ["xxxx.com"] start_urls=[] cookie={‘xxx‘:‘xxx‘,‘xxx‘:‘xxx‘} def start_requests(self): for url in self.start_urls: #加载cookies,指定回调函数,返回response yield Request(url,cookies=self.cookie,callback=self.parse_url) def parse_url(self,response): body = scrapy.Selector(response) for sel in body.xpath(‘xxx‘): item=XXXXItem() item[‘XX‘]=sel.xpath(‘td[1]/text()‘).extract() item[‘XX‘]=sel.xpath(‘td[3]/span/a/u/text()‘).extract() item[‘XX‘]=sel.xpath(‘td[5]/a/text()‘).extract() yield item
时间: 2024-10-05 23:09:20