import numpy as np
import operator
import random
import os def file2matrix(filePath):#从文本中提取特征矩阵和标签
f = open(filePath,'r+').readlines()
fileLength = len(f)
dataSet = np.zeros((fileLength,3),np.float64)
labelList = []
for i in range(fileLength):
row = f[i].split('\t')
dataSet[i,:] = row[0:3]
labelList.append(row[-1].strip('\n'))
return dataSet,labelList def autoNormal(data):#归一化处理
dataShape = data.shape
dataMin = data.min(0)
dataMax = data.max(0)
normalDataSet = np.zeros(dataShape,np.float64)
diff = dataMax - dataMin
normalDataSet = (data -np.tile(dataMin,(dataShape[0],1)))/np.tile(diff,(dataShape[0],1))
return normalDataSet,diff,dataMin def dataClassTest(dataSet,labelList):#测试算法准确率
ratio = 0.1
correntCount = 0
testNumber = int(ratio*dataSet.shape[0])
for i in range(testNumber):
k = random.randint(0, dataSet.shape[0])
label = classify0(dataSet[k],dataSet,labelList,20)
if label == labelList[k]:
correntCount += 1
return correntCount*100/testNumber def classifyPerson():#输入数据进行预测
dataSet,labelSet = file2matrix('datingTestSet.txt')
percentTats = float(input('Please input percentage of time spend playing video games?'))
miles = float(input('Please input frequent flier miles earned per year?'))
cream = float(input('Please input liters of ice cream consumed per year?'))
dataSet,diff,dataMin = autoNormal(dataSet)
intX = np.array([percentTats,miles,cream],np.float64) label = classify0((intX-dataMin)/diff,dataSet,labelSet,20)
print("You likely {0} the man!".format(label)) correntPercent = dataClassTest(dataSet,labelSet)
print("The estimate corrent percent is {0}%!".format(correntPercent)) def classify0(intX,dataSet,labelSet,k):#kNN分类算法
intX = np.tile(intX,(dataSet.shape[0],1))
square = (intX - dataSet)**2
sum = square.sum(axis=1)
sqrt = sum**0.5
sortedDistIndicies = sqrt.argsort()
classCount={}
for i in range(k):
label = labelSet[sortedDistIndicies[i]]
classCount[label] = classCount.get(label,0)+1
sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] def img2vector(filename):#将32*32图片转换成1*1024向量
vector = np.zeros((1,1024))
f = open(filename)
for i in range(32):
fr = f.readline()
for j in range(32):
vector[0,32*i+j] = int(fr[j])
return vector def handwritingClassTest():
filenameList = os.listdir(r'machinelearninginaction\Ch02\digits\trainingDigits')
m = len(filenameList)
trainLabelList = []
trainDataMatrix = np.zeros((m,1024))
for i in range(m):
trainLabelList.append(int(filenameList[i].strip('_')[0]))
trainDataMatrix[i,:] = img2vector(r'machinelearninginaction\Ch02\digits\trainingDigits\{0}'.format(filenameList[i]))
filenameList = os.listdir(r'machinelearninginaction\Ch02\digits\testDigits')
m = len(filenameList)
corrent = 0.0
for i in range(m):
testLabel = int(filenameList[i].strip('_')[0])
testIn = img2vector(r'machinelearninginaction\Ch02\digits\testDigits\{0}'.format(filenameList[i]))
testOut = classify0(testIn,trainDataMatrix,trainLabelList,3)
if testOut == testLabel:
corrent += 1
else:
print("Error:the classifier came back with:{0}, the real answer is:{1}。".format(testOut,testLabel))
print("the corrent percent is:%.2f %%。"%(corrent*100/m))
if __name__ == '__main__':
classifyPerson() #约会预测
#handwritingClassTest() #手写识别

约会预测运行结果:

Please input percentage of time spend playing video games?100
Please input frequent flier miles earned per year?8
Please input liters of ice cream consumed per year?200
You likely didntLike the man!
The estimate corrent percent is 96.0%! 进程已结束,退出代码 0

手写识别运行结果:

Error:the classifier came back with:7, the real answer is:1。
Error:the classifier came back with:9, the real answer is:3。
Error:the classifier came back with:3, the real answer is:5。
Error:the classifier came back with:6, the real answer is:5。
Error:the classifier came back with:6, the real answer is:8。
Error:the classifier came back with:3, the real answer is:8。
Error:the classifier came back with:1, the real answer is:8。
Error:the classifier came back with:1, the real answer is:8。
Error:the classifier came back with:1, the real answer is:9。
Error:the classifier came back with:7, the real answer is:9。
the corrent percent is:98.94 %。 进程已结束,退出代码 0

测试数据:

说明:代码参考《机器学习实战》

最新文章

  1. Activity调用静态方法改变UI,使用Handler来改变UI显示
  2. Mybatis中SqlMapper配置的扩展与应用(3)
  3. Java排序算法——插入排序
  4. 真正理解linux的inode?
  5. hdu 4578 线段树 ****
  6. 放松时刻——C#分割字符串
  7. 用类求圆面积c++
  8. HDU5221 Occupation 树链剖分
  9. [ZOJ 1008]Gnome Tetravex (dfs搜索 + 小优化)
  10. .NET设计模式(3):抽象工厂模式(Abstract Factory)
  11. Android 自学之进度条ProgressBar
  12. UVA11922--Permutation Transformer (伸展树Splay)
  13. KMP求字符串最小循环节
  14. js要怎么接收后端传的excel文件流?
  15. tab切换的效果——仿照今日头条APP的切换效果
  16. planning深度剖析
  17. React child
  18. Java之反射举例
  19. Copycat - AppendRequest
  20. 预备作业2 :学习基础和C语言基础调查

热门文章

  1. 在浏览器输入URL发生了什么
  2. VB Open 函数详解 打开、关闭、读、写文件
  3. 使用QT创建系统托盘
  4. Git提交代码的正确姿势
  5. ubuntu 14.04 安装openjdk 8
  6. springboot2.0+mysql整合mybatis,发现查询出来的时间比数据库datetime值快了8小时
  7. php实现大视频上传
  8. HDU-6703 array
  9. OpenCV feature2d
  10. 5.React中组件通信问题