"""请求网页""" import requests import re import time import os headers={ ‘user-agent‘:‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537‘ } response=requests.get(‘https://www.vmgirls.com/12985.html‘,headers=headers) #print(response.request.headers) #print(response.text) html=response.text """解析网页""" dir_name=re.findall(‘<h1 class="post-title h3">(.*?)</h1>‘,html)[-1]#文件 if not os.path.exists(dir_name):#检查文件 os.mkdir(dir_name) urls=re.findall(‘<a href="(.*?)" alt=".*?" title=".*?">‘,html) print(urls) """保存图片""" for url in urls: time.sleep(1) #图片的名字 file_name=url.split(‘/‘)[-1] response = requests.get(url, headers=headers) with open(dir_name+‘/‘+file_name,‘wb‘) as f: f.write(response.content
原文地址:https://www.cnblogs.com/liujinxin123/p/12404308.html
时间: 2024-10-09 04:49:09