一、感知机模型

二、线性回归(Linear Regression)

from numpy import *

def loadData(filename):
x = []
y = []
f = open(filename)
for line in f.readlines():
lineData = line.strip().split(',')
x.append([1.0,float(lineData[0])])
y.append(float(lineData[1]))
return x,y #预测函数,theta,x都是一维数组,dot运算得到实数,对于二维数组,dot运算就是矩阵运算
def h(theta,x):
return theta.dot(x) #批量梯度下降
def batch_gradient_descent(alpha,theta,x,y):
m,n = x.shape
newtheta = array([0] * n,dtype = float)
for j in range(n):
count = 0.0
for i in range(m):
count += (h(theta,x[i,:]) - y[i])*x[i,j]
newtheta[j] = newtheta[j] - count * alpha / m
return newtheta #正则方程
def normal_equation(x,y):
return linalg.inv(transpose(x).dot(x)).dot(transpose(x)).dot(y) #损失函数
def cost_function(theta,x,y):
m = x.shape[0]
return (x.dot(theta) - y).dot(x.dot(theta) - y) / (2 * m) def run():
x,y = loadData('ex1data1.txt')
x = array(x)
y = array(y) #列向量
m,n = x.shape
theta = array([0] * n,dtype = float)
costs = []
for iters in range(1000):
costs.append(cost_function(theta,x,y))
theta = batch_gradient_descent(0.01,theta,x,y)
print "batch gradient descent:\n"
print "theta:",theta
print 'cost:\n',costs print "normal equation:\n"
theta = normal_equation(x,y)
print "theta:",theta if __name__ == "__main__":
run()

三、Logistic Regression

def sigmoid(x):
return 1.0/(1 + exp(-x)) def trainLogRegres(x,y,opts):
m,n = x.shape
alpha = opts["alpha"]
maxIter = opts['maxIter']
weight = ones((n,1)) for k in range(maxIter):
if opts['optimizeType'] == 'batchGraDescent':
weight = weight - alpha * x.T * (sigmoid(x*weight) - y)
elif opts['optimizeType'] == 'stocGraDescent':
for i in range(m):
weight = weight - alpha * x[i,:].T * (sigmoid(x[i,:] * weight) - y[i,0])
else:
raise NameError('Not support optimize method type!') return weight def testLogRegres(weight,x,y):
m,n = x.shape
trueNum = 0
for i in range(m):
predict = sigmoid(x[i,:] * weight)[0,0] > 0.5
if predict == bool(y[i,0]):
trueNum += 1
accuracy = float(trueNum) / m
return accuracy #x每行对应一个样本,y是列向量
def loadData():
x = []
y = []
f = open("testSet.txt")
for line in f.readlines():
lineArr = line.strip().split()
x.append([1.0, float(lineArr[0]), float(lineArr[1])])
y.append(float(lineArr[2]))
return mat(x),mat(y).T if __name__ == '__main__':
x,y = loadData()
opts = {'alpha': 0.01, 'maxIter': 50, 'optimizeType': 'stocGraDescent'}
weight = trainLogRegres(x,y,opts)
accuracy = testLogRegres(weight,x,y)
print "accuracy:",accuracy

四、SVM

五、kmeans

https://en.wikipedia.org/wiki/Latent_semantic_analysis

最新文章

  1. Python自动化之django的ORM
  2. sqlalchemy 的 raw sql 方式使用示例
  3. 食物链 poj 1182
  4. GCD信号量并发控制
  5. //判断是否安装Flash插件
  6. 一个sigaction的C++ wrap
  7. 201621123068 Week02-Java基本语法与类库
  8. [原创软件]Maya语言切换工具
  9. idea+jsp+jstl c标签页面异常
  10. sqlserver 2014使用时有Cannot find one or more components
  11. c++函数strapy
  12. CDH版本的hadoop下载
  13. day4_高效处理文件
  14. Piggy-Bank HDU - 1114
  15. Beta阶段第1周/共2周 Scrum立会报告+燃尽图 05
  16. 【漏洞预警】Apache ActiveMQ Fileserver远程代码执行漏洞(CVE-2016-3088)
  17. Oracle PL/SQL语句基础学习笔记(上)
  18. CS6的安装与破解
  19. [ethernet]ubuntu更换网卡驱动
  20. android代码中自定义布局

热门文章

  1. uva1628 Pizza Delivery
  2. Qt 之 QApplication
  3. FreeMarker与SSH项目整合流程
  4. Django关于SQL注意事项
  5. "转成"
  6. XTU 二分图和网络流 练习题 J. Drainage Ditches
  7. PTA 02-线性结构4 Pop Sequence (25分)
  8. Gym - 100548C The Problem Needs 3D Arrays (最大密度子图)
  9. [Tyvj1939] 玉蟾宫(单调栈)
  10. mappedBy的具体使用及其含义