写了识别手写数字的KNN算法,如下图所示。参考链接http://blog.csdn.net/april_newnew/article/details/44176059。
# -*- coding: utf-8 -*- import numpy as np import pandas as pd import os def readtxt(filename): text=[] f = open(filename,‘r‘,encoding=‘utf-8‘) for line in f.readlines(): text.append(line) txt = list(text) txt=np.array(txt,dtype=‘float‘) txt = txt.tolist() return txt def readdata(rootfile): data = [] label = [] for root,dirs,files in os.walk(rootfile): for name in files: filename = root +‘\\‘+name txt = readtxt(filename) data.append(txt) label1 = name.split(‘_‘)[0] label.append(label1) data = pd.DataFrame(data) return data,label def KNN(traindata,trainlabel,testdatai,K): length = len(traindata) newtest = np.tile(testdatai, (length,1)) newtest = pd.DataFrame(newtest) diff = newtest - traindata diff = diff**2 cha = diff.sum(axis=1) cha = cha**0.5 result = pd.DataFrame({‘label‘:trainlabel, ‘cha‘:cha}) labels = result.sort_values(by=‘cha‘)[:K] frequent =labels.groupby(labels[‘label‘]).size() labely = frequent.argmax() return labely def test(trainfile,testfile,K): result = [] traindata, trainlabel= readdata(trainfile) testdata, testlabel = readdata(testfile) for i in range(len(testdata)): labely = KNN(traindata,trainlabel,testdata.loc[i,:],K) result.append(labely) tongji = pd.DataFrame({‘result‘:result,‘testlabel‘:testlabel}) accuary = len(tongji[tongji[‘result‘]==tongji[‘testlabel‘]])/len(result) return result,accuary trainfile=r‘E:\trainingDigits‘ testfile=r‘E:\testDigits‘ K=3 result, accuary= test(trainfile,testfile,K)
注:训练数据集有2,210条记录,测试数据有670条。准确率并不高,只有0.45。目前不知道为什么,以后多学习,争取优化代码。
时间: 2024-10-14 15:14:31