import urllib.parse import urllib.request import requests from bs4 import BeautifulSoup import csv import time import re sd=[‘名字‘,‘地址‘,‘价格‘,‘月销量‘,‘景点概述‘] with open(‘C:\\Users\\惠普\\Desktop\\ac2.csv‘,‘a+‘,newline=‘‘,encoding=‘utf-8‘)as f: writers=csv.writer(f) writers.writerow(sd) header={‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36‘, ‘X-Requested-With‘:‘XMLHttpRequest‘} base=‘&subject=&sku=‘ for i in range(1,80): url=‘http://piao.qunar.com/ticket/list.htm?keyword=%E7%83%AD%E9%97%A8%E6%99%AF%E7%82%B9®ion=&from=mpl_search_suggest&page={}‘.format(i) url=url+base response=requests.get(url,headers=header) soup=BeautifulSoup(response.text) name=soup.find_all(‘h3‘,{‘class‘:‘sight_item_caption‘}) address=soup.find_all(‘p‘,{‘class‘:‘address color999‘}) price=soup.find_all(‘span‘,{‘class‘:‘sight_item_price‘}) xiaoliang=soup.find_all(‘span‘,{‘class‘:‘hot_num‘}) note=soup.find_all(‘div‘,{‘class‘:‘intro color999‘}) with open(‘C:\\Users\\惠普\\Desktop\\ac2.csv‘,‘a+‘,newline=‘‘,encoding=‘utf-8‘)as f: writers=csv.writer(f) for i in range(len(name)): listw=[] c=price[i].text.replace(‘¥‘,‘‘).replace(‘起‘,‘‘) print(c) if c==‘免费‘: listw=[name[i].text,address[i].text.replace(‘地址:‘,‘‘),‘0‘,‘0‘,note[i].text] writers.writerow(listw) else: listw=[name[i].text,address[i].text.replace(‘地址:‘,‘‘),c,xiaoliang[i].text,note[i].text] writers.writerow(listw)
原文地址:https://www.cnblogs.com/persistence-ok/p/10982403.html
时间: 2024-10-09 00:35:55