刚刚python入门,学会了requests模块爬取简单网页,然后写了个爬取LOL官网皮肤的爬虫,代码奉上
#获取json文件
#获取英雄ID列表
#拼接URL
#下载皮肤
#导入re requests模块
import requests
import re
import time
def Download_LOL_Skin():
#英雄信息Json文件地址:https://lol.qq.com/biz/hero/champion.js
#获取英雄信息列表
json_url = "https://lol.qq.com/biz/hero/champion.js"
html_re = requests.get(json_url).content
html_str = html_re.decode()
pat_js = r‘"keys":(.*?),"data"‘
enc = re.compile(pat_js)
html_list = enc.findall(html_str)
dict_js = eval(html_list[0])
#拼接英雄皮肤下载url
#下载皮肤URL示例:https://ossweb-img.qq.com/images/lol/web201310/skin/big266000.jpg
download_url = []
for key in dict_js:
for i in range(15):
hero_str = str(i)
if len(hero_str) == 1:
num = ‘00‘
elif len(hero_str) == 2:
num = ‘0‘
numstr = key+num+hero_str
hero_download_url = r‘https://ossweb-img.qq.com/images/lol/web201310/skin/big‘+numstr+‘.jpg‘
download_url.append(hero_download_url)
#拼接保存地址 和 文件名字
#下载地址: /home/runaway/桌面/LOL_Skin
#LOL英雄名称列表
file_path_list = []
path = ‘/home/runaway/桌面/LOL_Skin/‘
for name in dict_js.values():
for i in range(15):
file_path = path+name+str(i)+‘.jpg‘
file_path_list.append(file_path)
#下载保存皮肤文件
n = 1
for i in range(len(download_url)):
status_code = requests.get(download_url[i]).status_code
if status_code == 200:
res = requests.get(download_url[i], verify=False).content
#time.sleep(1)
with open(file_path_list[i], "wb") as f:
f.write(res)
print(download_url[i]+"第"+str(n)+"张下载完成")
n = n + 1
print("共"+str(n)+"张图片下载完毕")
if __name__ == ‘__main__‘:
start = time.time()
Download_LOL_Skin()
end = time.time()
print("共用时"+str(end-start)+"秒")
里面加入了计时和图片计数功能,需要使用代码的同学注意修改路径。
希望大家关注我的博客,持续更新python相关项目和源代码,谢谢!
原文地址:https://www.cnblogs.com/Runaway11/p/10398503.html