1.读入数据
#-*- coding:utf-8 -*- import numpy as np import pandas as pd df_data = pd.read_csv(‘filename‘,sep=‘\t‘,header=False,names=[‘distance‘,‘pid‘,‘time‘,‘fee‘]) #http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html
2.ix
df_data.ix[df_data[‘carpool‘]==1,‘discount‘] = 1-df_data[‘discount_fee‘]/df_data[‘total_fee‘] df_sofa[‘start_gird‘] = df_sofa[[‘start_lat‘,‘start_lng‘]].apply(lambda x:get_HexCellKey(x[0],x[1]),axis=1)
3.DataFrame
time_split = pd.DataFrame((x.split(‘ ‘) for x in data.bubble_time),index=data.index,columns=[‘day‘,‘h‘])
4.cut
bins_p = [0, 30, 40, 60, 80, 400] group_names_p = [‘10‘, ‘30‘, ‘40‘, ‘60‘, ‘80‘] data[‘price‘] = pd.cut(data[‘total_fee‘], bins_p, labels=group_names_p)
5.sort_values
data=data.sort_values(by=[‘pid‘,‘time‘],ascending=True)
时间: 2024-11-10 11:27:38