# --*-- coding:utf-8 --*-- import math import itertools def Mean(t): """均值""" return float(sum(t)) / len(t) def E(x, p): """ 离散性随即变量的数学期望(也称为均值): 随机变量X与其概率P乘积的和 """ return sum([x[i] * p[i] for i in range(len(x))]) def Median(t): """中位数""" arr = sorted(t) idx = (len(arr) - 1) / 2 if type(idx) is int: return arr[idx] if type(idx) is float: return Mean(arr[int(math.floor(idx)):int(math.ceil(idx)) + 1]) def Mode(t): """众数""" if not t: return None arr = __getfreq(t) if arr[0][0] == 1: return None else: for k, g in itertools.groupby(arr, key=lambda x: x[0]): return [t[1] for t in g] def __getfreq(t): """获取t中每个值及其出现次数""" arr = sorted(t) alist = [] for k, g in itertools.groupby(arr): alist.append((len(list(g)), k)) alist.sort(key=lambda x: x[0], reverse=True) return alist def Var(t, mu=None): """方差""" if mu is None: mu = Mean(t) # compute the squared deviations and return their mean. dev2 = [(x - mu)**2 for x in t] var = Mean(dev2) return var def D(x, p): """ 离散性随机变量的方差: ((X与X的期望(均值)的差值)的平方)的期望(均值) """ # 由定义计算_0 # e = E(x, p) # return sum([(x[i] - e) ** 2 * p[i] for i in range(len(x))]) # 由定义计算_1,构造新的随机变量Y # e = E(x, p) # y = [(x[i] - e) ** 2 for i in range(len(x))] # return E(y, p) # 由简化公式计算 e = E(x, p) e_1 = E([x[i] ** 2 for i in range(len(x))], p) return e_1 - e ** 2 def SVar(t): """样本方差""" if not t: return None mu = Mean(t) return sum([(x - mu) ** 2 for x in t]) / (len(t) - 1) def MeanVar(t): """均值和方差""" mu = Mean(t) var = Var(t, mu) return mu, var def StdVar(t, mu=None): """标准差""" if mu is None: mu = Mean(t) import math return math.sqrt(Var(t, mu)) def Range(t): """极差""" if not t: return None return max(t) - min(t) def Cov(X, Y): """ 协方差 X与Y的对应离均差(x-mu)的乘积的均值 功能: 如果离均差变化方向一致,则正负号相同,乘积为正数 缺陷: X Y 例如是身高和体重, cm * kg 没有意义,所以一般用标准分数来解决 详见相关系数 ==> def pearson_correlation(X, Y): 标准分数单位为1,均值为0,方差为1 相关系数的单位为1,相比于协方差的单位更好理解 """ mu_x = Mean(X) mu_y = Mean(Y) # 计算离均差:如果X Y的变化方向一致,那么X,Y的离均差应该有相同的正负号 # d_x = [x - mu_x for x in X] # d_y = [y - mu_y for y in Y] # # 离均差的均值 ==> 协方差 # return Mean([d_x[i] * d_y[i] for i in range(len(X))]) total = 0.0 for x, y in zip(X, Y): total += (x-mu_x) * (y-mu_y) return total / len(X) def standardsocre(x, mu, sigma): """ 标准分数 x-mu ==> 离差:x与均值的差 x-mu / sigma 实现归一化 功能:转换后的标准化变量Z的单位为 1 , 均值为0, 方差为 1 """ return (x - mu) / sigma def pearson_correlation(X, Y): """ 相关系数: 将协方差中的X,Y 转化为标准分数,标准分数的乘积的均值即为相关系数 (x-mu)/ sigma 实现了 归一化 相关系数的单位为1,相比于协方差的单位更好理解 协方差为1,二者完全相关,知道其中的一个值,可以准确预测另外一个值 协方差为-1,而这完全负相关 """ # 计算均值(期望) mu_x = Mean(X) mu_y = Mean(Y) # 计算标准差 sigma_x = StdVar(X) sigma_y = StdVar(Y) # 标准分数的均值即为:相关系数 p = [standardsocre(X[i], mu_x, sigma_x) * standardsocre(Y[i], mu_y, sigma_y) for i in range(len(X))] return Mean(p) # 采用简易公式计算相关系数 # sigma_x = stdvar(X) # sigma_y = stdvar(Y) # return Cov(X, Y) / (sigma_x * sigma_y) #相关系数=X,Y的协方差除以X的标准差*Y的标准差 # xbar, varx = MeanVar(X) # ybar, vary = MeanVar(Y) # corr = Cov(xs, ys) / math.sqrt(varx * vary) # return corr if __name__ == ‘__main__‘: astr = ‘93 62 51 93 75 82 93 62 65 51‘ alist = [int(e) for e in astr.split()] print ‘均值:‘, Mean(alist) print ‘中位数:‘, Median(alist) print ‘众数:‘, Mode(alist) print ‘极差:‘, Range(alist) print ‘总体方差:‘, Var(alist) print ‘样本方差‘, SVar(alist) print ‘标准差:‘, StdVar(alist) print ‘验证协方差 Cov(X, X) == Var(X)‘ print Cov(alist, alist) print Var(alist) print ‘协方差与方差相等,说明公式编写的函数是正确的‘
基本概念: 均值,期望,中位数,众数,极差,总体方差,样本方差,协方差
时间: 2024-10-13 00:38:33