1 # conding=utf-8 2 import json 3 import re 4 import requests 5 6 def get_data(url): 7 response = requests.get(url) 8 response.encoding = ‘utf-8‘ 9 if response.status_code == 200: 10 11 return response.text 12 return None 13 14 def parse_data(html): 15 pattern = re.compile(‘<li.*?skyid.*?h1>(.*?)</h1>.*?wea">(.*?)</p>.*?pan>(\d+)</span>.*?i>(.*?)</i>.*?i>(.*?)</i>.*?</li>‘, re.S) 16 items = re.findall(pattern, html) 17 for item in items: 18 yield{ 19 ‘data‘:item[0], 20 ‘weather‘:item[1], 21 ‘T‘:item[2]+‘/‘+item[3], 22 ‘wind‘:item[4] 23 } 24 25 def write_to_file(content): 26 with open(‘result.txt‘, ‘a‘, encoding=‘utf-8‘) as f: 27 f.write(json.dumps(content, ensure_ascii=False) + ‘\n‘) 28 f.close() 29 30 def main(): 31 url = ‘http://www.weather.com.cn/weather/101280601.shtml‘ 32 html = get_data(url) 33 for item in parse_data(html): 34 print(item) 35 write_to_file(item) 36 37 if __name__ == ‘__main__‘: 38 main()
原文地址:https://www.cnblogs.com/jp1021/p/9876770.html
时间: 2024-11-11 21:26:02