import requestsimport parselimport timeimport threading # 模拟浏览器headers = {"Referer": "https://maoyan.com/board/4?offset=0", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \ Chrome/80.0.3987.116 Safari/537.36"} # 定义获取方法def get_info(page): # 获得信息页面 url = "https://maoyan.com/board/4?offset={}".format(page) response = requests.get(url, headers=headers) # print(response.text) # 读取页面内容 sel = parsel.Selector(response.text) # print(sel.re(‘<p class="releasetime">(.*?)</p>‘)) # print(sel.xpath(‘//p[@class="releasetime"]/text()‘).getall()) # 整理信息 dds = sel.css("dd") for dd in dds: print({ "电影名称": dd.css(‘p.name a::text‘).getall()[0], "主演": dd.css(‘p.star::text‘).getall()[0].strip(), "上映时间": dd.css(‘p.releasetime::text‘).getall()[0], "评分": "".join(dd.css(‘p.score i::text‘).getall()) }) # print("*" * 100) star_time = time.time() # 构造URL 请求10页的数据for page in range(0, 100, 10): # 创建线程对象 t1 = threading.Thread(target=get_info, args=(page,)) # 启动线程 t1.start() # get_info(page) # 打印线程数print("总线程数:", threading.enumerate())while len(threading.enumerate()) > 1: passprint("总用时:", time.time()-star_time, "s", sep=" ")
原文地址:https://www.cnblogs.com/panda009079/p/12348284.html
时间: 2024-10-09 05:38:18