import requests,re,pandas
from fake_useragent import UserAgent
url = ‘http://www.laoyuegou.com/x/zh-cn/lol/lol/godrank.html?region=cn&area=1‘
x=‘.{1,600}?‘;y=‘([一-龥]+)‘
pattern=re.compile(f‘item1">(#\d+){x}server">{y}{x}em>(\d+){x}span>(\d+){x}span\
>(\d+){x}age">({x}%){x}score/({x}).png{x}dan">({x})<{x}"{y}"{x}"{y}"{x}"{y}"‘, re.S)
def downLoad():
r=requests.get(url,headers={‘User-Agent‘:UserAgent().random}).text
area=re.findall(‘cn-li.*?(http.*?)".*?([一-龥]{2,9})‘,r,re.S)
for region in area:
players = []
for page in range(1,5):
h = {‘User-Agent‘: UserAgent().random}
r=requests.get(region[0]+f‘&page={page}‘,headers=h).text
print(‘下载%s战区的第%s页‘ %(region[1],page))
#同类型之间才能用+;字串转tuple,除了套(),还要加逗号,如(s,)
# [players.append((region[1],)+x) for x in pattern.findall(r)]
players.extend(pattern.findall(r))
pandas.DataFrame(players).to_csv(‘LOL.csv‘,header=False,index=False,mode=‘a+‘)
# downLoad()
***************分割线***************
import pandas
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams[‘font.sans-serif‘]=[‘simhei‘] #配置字体,msyh等字体不能展示中文
plt.rcParams[‘axes.labelsize‘]=16 #配置绘图风格
plt.rcParams[‘xtick.labelsize‘] = 14
plt.rcParams[‘ytick.labelsize‘] = 14
plt.rcParams[‘legend.fontsize‘] = 12
plt.rcParams[‘figure.figsize‘] = [15,6]
plt.rcParams[‘axes.unicode_minus‘] = False
data=pandas.read_csv(‘E:\py\\LOL.csv‘,encoding=‘gbk‘) #文件路径中不能有中文
# print(data[50:60]) #输出哪些行
# print(data.head(7)) #head()和tail(),不写数字时,默认取5条记录
# print(data.ix[0:4,[‘排名‘,‘本命英雄1‘]]) #罕见的输出行包含end
# print(data.describe()) #int类型的那些列的8条统计信息
# data[‘位置‘].value_counts().drop(‘--‘).plot(kind=‘bar‘,rot=45,color=‘r‘) #kind图表型,rot轴标题
pandas.concat([data[‘本命英雄1‘][:7],data[‘本命英雄2‘][:7],data[‘本命英雄3‘][:7]])\
.value_counts().plot(kind=‘barh‘,rot=0,color=‘y‘)
plt.show()
****************************************分割线****************************************
666