Python3.5Mac ios系统爬取天气预报的小程序: import requests, csv, random, time, socketfrom bs4 import BeautifulSoupimport http.client def get_content(url, data = None): header = { ‘Accept‘: ‘text / html, application / xhtml + xml, application / xml;q = 0.9, image / webp, * / *;q = 0.8‘, ‘Accept - Encoding‘:‘gzip, deflate, sdch‘, ‘Accept - Language‘:‘zh - CN, zh;q = 0.8‘, ‘Connection‘:‘keep - alive‘, ‘User - Agent‘: ‘Mozilla / 5.0(Macintosh;Intel Mac OS X 10 11_6) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 50.0.2661.102Safari / 537.36‘ } timeout = random.choice(range(80,180)) while True: try: rep = requests.get(url, headers = header, timeout = timeout) rep.encoding = ‘utf-8‘ break except socket.timeout as e: print(‘3:‘, e) time.sleep(random.choice(range(8,15))) except socket.error as e: print(‘4:‘, e) time.sleep(random.choice(range(20,60))) except http.client.BadStatusLine as e: print(‘5:‘, e) time.sleep(random.choice(range(30,80))) except http.client.ImproperConnectionState as e: print(‘6:‘, e) time.sleep(random.choice(range(5,15))) return rep.text def get_data(html_text): finalFile = [] bs = BeautifulSoup(html_text, ‘html.parser‘) body = bs.body data = body.find(‘div‘, id="15d") ul = data.find(‘ul‘) li = ul.find_all(‘li‘) for day in li: temp = [] inf = day.find_all(‘span‘) date = inf[0].string temp.append(date) weather = inf[1].string temp.append(weather) temperature= inf[2].text temp.append(temperature) wind = inf[3].string temp.append(wind) wind1 = inf[4].string temp.append(wind1) finalFile.append(temp) return finalFile def write_data(data, name): file_name = name with open(file_name, ‘a‘, errors = ‘ignore‘, newline = ‘‘) as f: f_csv = csv.writer(f) f_csv.writerows(data) if __name__ == ‘__main__‘: url = ‘http://www.weather.com.cn/weather15d/101270101.shtml‘ html = get_content(url) result = get_data(html) write_data(result, ‘content.csv‘)
时间: 2024-10-24 14:23:57