import pandas as pd
from datetime import datetime
from math import ceil #向上取整
from pandas import DataFrame
data=pd.read_excel(‘./data/data5.xlsx‘,encoding=‘gbk‘,index_col=‘用户‘)
print(data.head())
data.describe(include=‘all‘)
#构造R,F,M, recency距最近时间 frequency 频率 消费金额 monetary
exdata_date=datetime(2016,8,10)
print(exdata_date)
#R
diff_R=exdata_date-data[‘最近投资时间‘]
diff_R.head()
diff_R[1].days
type(diff_R[1].days)
R=[]
for i in diff_R:
R.append(i.days)
diff=exdata_date-data[‘首次投资时间‘]
diff_months=[]
diff[1].days
for i in diff:
diff_months.append(ceil(i.days/30))
#F 平均频次
F=(data[‘投标总次数‘]/diff_months).values
print(F)
#M 平均金额
M=(data[‘总投资金‘]/diff_months).values
cdata=DataFrame([R,list(F),list(M)]).T
cdata.columns=[‘R‘,‘F‘,‘M‘]
cdata.index=data.index
cdata.head()
原文地址:https://www.cnblogs.com/tiankong-blue/p/11623382.html