pca_python

#pythonfrom numpy import *
def loadData(filename):
    data=[]
    for line in open(filename).readlines():
        ft=line.strip().split("\t")
        th=map(float,ft)
        data.append(th)
    return mat(data)
def pca(datamat,k):
    meanval=mean(datamat,0)
    center=datamat-meanval
    covmat=(center.T*center)/(shape(datamat)[0]-1)
    eigval,eigvec=linalg.eig(mat(covmat))
    eigvalind=argsort(eigval)
    sortind=eigvalind[:-(k+1):-1]
    eigve=eigvec[:,sortind]
    lowmat=center*eigve
    return lowmat
datamat=loadData("testSet.txt")
lowmat=pca(datamat,1)
print lowmat
%matlabdata=vpa(load(‘testSet.txt‘),10);
[m,n]=size(data);
meanval=data-repmat(mean(data),m,1);%repmat与numpy的tile相对
calccov=(meanval‘*meanval)./(m-1);%计算协方差矩阵
[eigvec,eigval]=eig(calccov);%计算协方差矩阵的特征值和特征向量
[val,ind]=sort(eigval,‘descend‘);%按特征值进行排序,选择特征值占比较大的前N个
eigvecmatrix=eigvec(:,ind(1));%降特征降到一维
lowmatrix=meanval*eigvecmatrix;%降维后的样本矩阵
lowmatrix=vpa(lowmatrix,10);

  

时间: 2024-10-19 10:51:23

pca_python的相关文章