#中文myfont = FontProperties(fname=r‘C:\Windows\Fonts\simhei.ttf‘, size=14)import seaborn as snssns.set(font=myfont.get_name()) file = open(".xlsx", "rb")data = pd.read_excel(file, sep="\t")data = data.loc[(data[‘时间‘] > 0) & (data[‘时间‘] < 4320)]drop_feat = ["编号",..., "状态"]feat = [i for i in data.columns if i not in drop_feat]data = data[feat]print(data.isnull().sum() / len(data)) #看缺失比例,字段 # 读取文档以及,过滤填充数据, 筛选数据
# ddie = data.loc[(data[‘等级‘] == "Ⅲ") | (data[‘等级‘] == "Ⅳ")]
data = ddie.groupby([‘时段‘, ‘分类‘]).mean().reset_index()dataForsize = ddie.groupby([‘时段‘, ‘分类‘]).size().reset_index() dmean1 = data.loc[(data[‘类‘] == ‘‘) | (data[‘‘] == ‘‘)]dsize1 = dataForsize.loc[(dataForsize[‘类‘] == ‘统‘) | (dataForsize[‘‘] == ‘‘)]dmean1 = dmean1.groupby([‘挂‘]).mean() # 分组后 平均dsize1 = dsize1.groupby([‘挂‘])[0].agg(sum) # 计 分组后 求和 # xx = list(range(0, 24)) 技巧得 x轴 连续坐标y1 = dmean1["时间"]x1 = y1._index._data 技巧 对应的 索引 不连续坐标 , 方法论:debug查 属性 plt.figure(figsize=(16,5))plt.plot(x1, y1,color=‘blue‘)plt.plot(x3, y3,color=‘red‘)for i, (_x, _y) in enumerate(zip(x1, y1)): plt.text(_x, _y, dsize1[x1[i]], color=‘blue‘, fontsize=12) # 关键 dsize1[x1[i]] 是从连续的i找不连续的x[i]的坐标来得到不连续的y值
plt.xticks(np.arange(24))label = [ "其它"]plt.legend(label, loc=0, ncol=2)plt.xlabel(" 0-23小时")plt.ylabel("时长")plt.show()
原文地址:https://www.cnblogs.com/-aye/p/8984484.html
时间: 2024-11-13 10:38:42