常见machine learning模型实现

一、感知机模型

二、线性回归(Linear Regression)

from numpy import *

def loadData(filename):

    x = []

    y = []

    f = open(filename)

    for line in f.readlines():

        lineData = line.strip().split(',')

        x.append([1.0,float(lineData[0])])

        y.append(float(lineData[1]))

    return x,y

#预测函数，theta,x都是一维数组，dot运算得到实数，对于二维数组，dot运算就是矩阵运算

def h(theta,x):

    return theta.dot(x)

#批量梯度下降

def batch_gradient_descent(alpha,theta,x,y):

    m,n = x.shape

    newtheta = array([0] * n,dtype = float)

    for j in range(n):

        count = 0.0

        for i in range(m):

            count += (h(theta,x[i,:]) - y[i])*x[i,j]

        newtheta[j] = newtheta[j] - count * alpha / m

    return newtheta

#正则方程

def normal_equation(x,y):

    return linalg.inv(transpose(x).dot(x)).dot(transpose(x)).dot(y)

#损失函数

def cost_function(theta,x,y):

    m = x.shape[0]

    return (x.dot(theta) - y).dot(x.dot(theta) - y) / (2 * m)

def run():

    x,y = loadData('ex1data1.txt')

    x = array(x)

    y = array(y)  #列向量

    m,n = x.shape

    theta = array([0] * n,dtype = float)

    costs = []

    for iters in range(1000):

        costs.append(cost_function(theta,x,y))

        theta = batch_gradient_descent(0.01,theta,x,y)

    print "batch gradient descent:\n"

    print "theta:",theta

    print 'cost:\n',costs

    print "normal equation:\n"

    theta = normal_equation(x,y)

    print "theta:",theta

if __name__ == "__main__":

    run()

三、Logistic Regression

def sigmoid(x):

    return 1.0/(1 + exp(-x))

def trainLogRegres(x,y,opts):

    m,n = x.shape

    alpha = opts["alpha"]

    maxIter = opts['maxIter']

    weight = ones((n,1))

    for k in range(maxIter):

        if opts['optimizeType'] == 'batchGraDescent':

            weight = weight - alpha * x.T * (sigmoid(x*weight) - y)

        elif opts['optimizeType'] == 'stocGraDescent':

           for i in range(m):

               weight = weight - alpha * x[i,:].T * (sigmoid(x[i,:] * weight) - y[i,0])

        else:

            raise NameError('Not support optimize method type!')

    return weight

def testLogRegres(weight,x,y):

    m,n = x.shape

    trueNum = 0

    for i in range(m):

        predict = sigmoid(x[i,:] * weight)[0,0] > 0.5

        if predict == bool(y[i,0]):

            trueNum += 1

    accuracy = float(trueNum) / m

    return accuracy

#x每行对应一个样本，y是列向量

def loadData():

    x = []

    y = []

    f = open("testSet.txt")

    for line in f.readlines():

        lineArr = line.strip().split()

        x.append([1.0, float(lineArr[0]), float(lineArr[1])])

        y.append(float(lineArr[2]))

    return mat(x),mat(y).T

if __name__ == '__main__':

    x,y = loadData()

    opts = {'alpha': 0.01, 'maxIter': 50, 'optimizeType': 'stocGraDescent'}

    weight = trainLogRegres(x,y,opts)

    accuracy = testLogRegres(weight,x,y)

    print "accuracy:",accuracy

四、SVM

五、kmeans

https://en.wikipedia.org/wiki/Latent_semantic_analysis

巴特西

常见machine learning模型实现

最新文章

热门文章