import re import urllib.request import gevent def download(image_download, images_path,i): headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"} req = urllib.request.Request(url=images_path, headers=headers) image = urllib.request.urlopen(req) image_content = image.read() image_name = image_download + "/" + str(i) + ".jpg" file = open(image_name, "wb") file.write(image_content) file.close() def main(): with open("douyu.html", "r", encoding="utf-8") as f: messages = f.read() images_path = re.findall(r"https://.*?\.jpg", messages) directory = "images_douyu" i = 0 gevent_list = list() for image_path in images_path: i += 1 gevent_list.append(gevent.spawn(download,directory, image_path, i)) gevent.joinall(gevent_list) if __name__ == "__main__": main()
原文地址:https://www.cnblogs.com/Lclog/p/9657927.html
时间: 2024-10-16 00:03:09