import requests #请求库
from bs4 import BeautifulSoup #网页解析库
def get_girl(url):
#伪造请求头信息
header = {
#用户代理
‘User-Agent‘:"Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.1(KHTML,like Gecko) Chrome/22.0.1207.1 Safari/537.1",
#上个页面的url
‘referer‘:‘https://www.mzitu.com/‘, #这个很重要,好多f反爬虫都验证这个字段
}
html = request.get(url, headers = headers) # 发送请求
all_list = BeautifulSoup(html.text,‘lxml‘).find(‘ul‘,id = ‘pin‘).find_all(‘li‘) #解析网页内容
for i in all_list:
girl_title = i.get_text() #拿到图片标题
girl_url = i.find(‘img‘)[‘data-original‘] #拿到图片url
response = requests.get(girl_url, headers = headers) #下载图片
file_name = girl_title + ".jpg" #拼接图片名称
print("正在保存图片文件:" + file_name)
with open(file_name,"wb") as f: #图片写入到本地
f.write(response.content)
if __name__ == "__main__":
for page in range(1,21) :#下载前20页
url = ‘https://www.mzitu.com/mm/page/%s‘ % page #拼接每一页的url
get_gril(url) #得到图片
原文地址:https://www.cnblogs.com/yanhonghong/p/11681885.html