import requests from lxml import etree class TieBa(object): def __init__(self,query_string): self.query_string = query_string self.base_url = ‘https://tieba.baidu.com/f‘ self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) " "AppleWebKit/537.36 (KHTML, like Gecko) " } def params(self): para = {"kw": self.query_string} return para def send_request(self,url, parms={}): response = requests.get(url, params= parms, headers = self.headers) return response.content # 2. 数据类型转换,提取数据 def parse_data(self, data, rule): html_data = etree.HTML(data) data_list = html_data.xpath(rule) return data_list # 3. 保存数据 def save_data(self, data, name): print(name) image_path = "D:/img/" + name with open(image_path, ‘wb‘) as f: f.write(data) # 主要运行的方法 def run(self): tieba_params = self.params() datas = self.send_request(self.base_url,tieba_params) #xpath解析 detail_rule = ‘//div[@class="t_con cleafix"]/div/div/div/a/@href‘ url_list = self.parse_data(datas, detail_rule) for label in url_list: image_url = ‘https://tieba.baidu.com‘ + label detail_data = self.send_request(image_url) # 解析图片 detail_url = ‘//img[@class="BDE_Image"]/@src‘ image_url_list = self.parse_data(detail_data, detail_url) for image_url_1 in image_url_list: image_data = self.send_request(image_url_1) image_name = image_url_1[-12:] #保存图片 self.save_data(image_data, image_name) if __name__ ==‘__main__‘: a = input(‘请先在D盘创建一个名为img的文件夹来接收图片\n‘ ‘接下来请输入你要查询的关键字: ‘) tieba = TieBa(a) tieba.run()
原文地址:https://www.cnblogs.com/wshr210/p/11302299.html
时间: 2024-10-11 07:18:27