import numpy as np import operator import os def createDataset(): group=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) lables=[‘A‘,‘A‘,‘B‘,‘B‘] return group,lables def classify0(inX,dataSet,labels,k): dataSetSize=dataSet.shape[0] diffMat=np.tile(inX,(dataSetSize,1))-dataSet sqDiffMat=diffMat**2 sqDistances=sqDiffMat.sum(axis=1) distances=sqDistances**0.5 sortDistancesIndex=distances.argsort() classCount={}#TODO toOrder dectionary for i in range(k): voteIlabel=labels[sortDistancesIndex[i]] classCount[voteIlabel]=classCount.get(voteIlabel,0)+1 sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] def filematrix(filename): fr=open(filename) arrayOfLines=fr.readlines() numberOfLines=len(arrayOfLines) returnMat=np.zeros((numberOfLines,3)) classLableVector=[] index=0 for line in arrayOfLines: line=line.strip() listFromLine=line.split(‘\t‘) returnMat[index,:]=listFromLine[0:3] classLableVector.append(int(listFromLine[-1])) index+=1 return returnMat,classLableVector def autoNorm(dataSet): minVals=dataSet.min(0) maxVals=dataSet.max(0) rangs=maxVals-minVals dtRow=dataSet.shape[0] normDataset=dataSet-np.tile(minVals,(dtRow,1)) resultDataset=normDataset/np.tile(rangs,(dtRow,1)) return resultDataset,rangs,minVals def datingClassTest(): hoRatio=0.10 errorCount=0.0 datingMat,datingLabels=filematrix(‘dts.txt‘); normMat,normRang,normMin=autoNorm(datingMat) dataRows=normMat.shape[0] testDataRows=int(dataRows*hoRatio) for i in range(testDataRows): classfileterResult=classfy0(normMat[i,:],normMat[testDataRows:dataRows,:],datingLabels[testDataRows:dataRows],3) print("这次分类结果是: %d,这个真实的结果为:%d"%(classfileterResult,datingLabels[i])) if(classfileterResult!= datingLabels[i]):errorCount+=1.0 print("这次分类的总错误率为:%f"%(errorCount/float(testDataRows))) def classifyPerson(): resultList = [‘没有魅力‘, ‘魅力一般‘, ‘很有魅力‘] percentTats = float(input("每天所玩电子游戏的占比?")) ffMiles = float(input("每年的飞行里程数?")) iceCream = float(input("每周吃多少冰淇淋(升)?")) datingDataMat, datingLabels = filematrix(‘dts.txt‘) normMat, ranges, minVals = autoNorm(datingDataMat) inArr = np.array([ffMiles, percentTats, iceCream]) classifierResult = classify0((inArr - minVals)/ranges, normMat, datingLabels,3) print (‘这个人让人感觉: ‘, resultList[classifierResult - 1]) # 2:手写识别系统 #将一个32*32的二进制图像矩阵转换成1*1024的向量 def img2vector(filename): returnVect = np.zeros((1,1024)) fr = open(filename) for i in range(32): lineStr = fr.readline() for j in range(32): returnVect[0, 32*i+j] = int(lineStr[j]) return returnVect #手写识别系统测试代码 def handwritingClassTest(): hwLabels = [] trainingFileList = os.listdir(‘trainingDigits‘) #获取目录内容 m = len(trainingFileList) trainingMat = np.zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] #分割得到标签 从文件名解析得到分类数据 fileStr = fileNameStr.split(‘.‘)[0] classStr = int(fileStr.split(‘_‘)[0]) hwLabels.append(classStr) #测试样例标签 trainingMat[i,:] = img2vector(‘trainingDigits/%s‘ % fileNameStr) testFileList = os.listdir(‘testDigits‘) errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split(‘.‘)[0] classStr = int(fileStr.split(‘_‘)[0]) vectorUnderTest = img2vector(‘testDigits/%s‘ % fileNameStr) classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3) print (‘the classifier came back with: %d, the real answer is: %d‘ % (classifierResult, classStr)) if(classifierResult != classStr): errorCount += 1.0 print ("\nthe total numbers of errors is : %d" % errorCount) print ("\nthe total error rate is: %f" % (errorCount/float(mTest)))
时间: 2024-10-13 16:05:55