# -*- coding: utf-8 -*- import pandas as pd import numpy as np import matplotlib.pyplot as plt import json s = pd.Series([1,3,5,np.nan,6,8]); print(s); s = pd.Series([1,3],["a","b"]); print(s); #循环查询日期范围内数据 dates = pd.date_range(‘20130101‘,periods=6); print(dates); df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list(‘ABCD‘)) print("LIMIT:============"); print(df.head(3)); print(df.tail(3)); print("反转XY显示 :============"); print(df.T); #排序 #df.sort_index(axis=1, ascending=False) #df.sort(columns=‘B‘) ########################选择器############################# print("指定列 :============"); print(df[[‘A‘,‘B‘]]); print("df[行范围,列范围]:============"); print(df[0:3]); print("指定索引主键 :============"); print(df[‘20130103‘:‘20130104‘]); print("布尔过滤 :============"); print(df[df.A>0.5]); #print(df[0:3,0:1]); ########################距阵操作 ############################# print("距阵操作 :============"); print(df*2); print(np.exp(df)); ########################TABLE 元素############################# #以二维数组显示 #df.values #列描述 #df.columns #索引 #df.index #对象类型 #df.dtypes print("字典(JSON)转换距阵 :============"); sdata = {‘Ohio‘: 35000, ‘Texas‘: 71000, ‘Oregon‘: 16000, ‘Utah‘: 5000}; df = pd.Series(sdata); print(df); sdata = [{‘Ohio‘: 35000, ‘Texas‘: 71000, ‘Oregon‘: 16000, ‘Utah‘: 5000}]; df = pd.DataFrame(sdata); df.columns = [‘Ohio‘, ‘Texas‘, ‘Oregon‘, ‘Utah‘]; print(df); df = df.fillna(0) #将缺失值都替换为0 #入门 #http://pandas.pydata.org/pandas-docs/stable/10min.html #http://pda.readthedocs.org/en/latest/chp5.html #blog #http://cloga.info/#wat_e_eb3d32d8-f59a-4a08-bf96-6f706d89c097_zss_ #大数据 #http://www.17bigdata.com/?cat=22 #优化 #http://1.aisensiy.sinaapp.com/2014/03/%E6%9C%80%E8%BF%91%E4%BD%BF%E7%94%A8-pandas-%E7%9A%84%E6%80%BB%E7%BB%93/ df.to_csv(‘E:\\py\\foo.csv‘) ######################JSON########################### print("josn===================="); j = [{‘0‘:{"a":"a"},‘1‘:{"b":"b"}}]; elevations = json.dumps(j); df = pd.read_json(elevations ); print(df);
大数据统计笔记
时间: 2024-10-11 22:32:15