一、K近邻方法(KNeighborsClassifier)
使用方法同kmeans方法,先构造分类器,再进行拟合。区别是Kmeans聚类是无监督学习,KNN是监督学习,因此需要划分出训练集和测试集。
直接贴代码。
X=[0,1,2,3]#样本 Y=[0,0,1,1]#标签 from sklearn.neighbors import KNeighborsClassifier neigh = KNeighborsClassifier(n_neighbors=3)#选择周围的三个点作为近邻分析 neigh.fit(X,Y) neigh.predict(1.1)
K-NN可以看成:有那么一堆你已经知道分类的数据,然后当一个新数据进入的时候,就开始跟训练数据里的每个点求距离,然后挑离这个训练数据最近的K个点看看这几个点属于什么类型,然后用少数服从多数的原则,给新数据归类。
2、决策树(DessionTreeClassifer)
from sklearn.datasets import load_iris form sklearn.tree import DessionTreeClassifier from sklearn.model_selection import cross_val_score#交叉验证 clf = DessionTreeClassifier()#默认参数构造的话,基于基尼系数 iris = iris_load() cross_val_score(clf,iris.data,iris.target,cv=10)#cv=10代表10折验证
from sklearn import tree X = [[0, 0], [1, 1]] Y = [0, 1] clf = tree.DecisionTreeClassifier() clf = clf.fit(X, Y) clf.predict([[1.2, 1.2]])
三、朴素贝叶斯(naive_bayes.GaussianNB)
对于给定的数据,首先基于特征的条件独立性假设,学习输入输出的联合分布概率,然后基于此模型对给定的输入x,应用贝叶斯定力,测试其后验概率。
在sklearn中实现了高斯朴素贝叶斯,多项式朴素贝叶斯,多元伯努利朴素贝叶斯。
import numpy as np from sklearn.naive_bayes import GaussianNB X=np.array([[-1,-1],[-2,-1],[-3,-2],[1,1],[2,1],[3,2]]) Y=np.array([1,1,1,2,2,2]) clf=GaussianNB() clf.fit(X,Y) print(clf.predict([[-0.8,-1]]))
三、人体运动状态信息评级
import pandas as pd import numpy as np from sklearn.preprocessing import Imputer from sklearn.cross_validation import train_test_split from sklearn.metrics import classification_report from sklearn.neighbors import KNeighborsClassifier#K近邻 from sklearn.tree import DecisionTreeClassifier#决策树 from sklearn.naive_bayes import GaussianNB#朴素贝叶斯 def load_datasets(feature_paths, label_paths): feature = np.ndarray(shape=(0,41)) label = np.ndarray(shape=(0,1)) for file in feature_paths: df = pd.read_table(file, delimiter=‘,‘, na_values=‘?‘, header=None) imp = Imputer(missing_values=‘NaN‘, strategy=‘mean‘, axis=0) imp.fit(df) df = imp.transform(df) feature = np.concatenate((feature, df)) for file in label_paths: df = pd.read_table(file, header=None) label = np.concatenate((label, df)) label = np.ravel(label) return feature, label if __name__ == ‘__main__‘: ‘‘‘ 数据路径 ‘‘‘ featurePaths = [‘A.feature‘,‘B.feature‘,‘C.feature‘,‘D.feature‘,‘E.feature‘] labelPaths = [‘A.label‘,‘B.label‘,‘C.label‘,‘D.label‘,‘E.label‘] ‘‘‘ 读入数据 ‘‘‘ x_train,y_train = load_datasets(featurePaths[:4],labelPaths[:4]) x_test,y_test = load_datasets(featurePaths[4:],labelPaths[4:]) x_train, x_, y_train, y_ = train_test_split(x_train, y_train, test_size = 0.0) print(‘Start training knn‘) knn = KNeighborsClassifier().fit(x_train, y_train) print(‘Training done‘) answer_knn = knn.predict(x_test) print(‘Prediction done‘) print(‘Start training DT‘) dt = DecisionTreeClassifier().fit(x_train, y_train) print(‘Training done‘) answer_dt = dt.predict(x_test) print(‘Prediction done‘) print(‘Start training Bayes‘) gnb = GaussianNB().fit(x_train, y_train) print(‘Training done‘) answer_gnb = gnb.predict(x_test) print(‘Prediction done‘) print(‘\n\nThe classification report for knn:‘) print(classification_report(y_test, answer_knn)) print(‘\n\nThe classification report for DT:‘) print(classification_report(y_test, answer_dt)) print(‘\n\nThe classification report for Bayes:‘) print(classification_report(y_test, answer_gnb))
四、支持向量机(SVM)
from sklearn import svm X = [[0, 0], [1, 1], [1, 0]] # training samples y = [0, 1, 1] # training target clf = svm.SVC() # class clf.fit(X, y) # training the svc model result = clf.predict([[2, 2]]) # predict the target of testing samples print(result) # target print(clf.support_vectors_) #support vectors print(clf.support_) # indeices of support vectors print(clf.n_support_) # number of support vectors for each class
以上。
:)
时间: 2024-10-11 00:46:04