@hanxiaoyang
2016-05-07T08:58:52.000000Z
字数 4720
阅读 2864
线性回归与逻辑回归

线性回归代码
from __future__ import divisionimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport math#将原始数据读成矩阵形式,用到了numpy科学计算包data = np.genfromtxt('ex1data1.txt',delimiter=',')x=data[:,0]y=data[:,1]m=y.size()a=ones(m,2)#计算损失函数值并返回def cost(x, y, theta=np.zeros((2,1))):"""计算和返回损失函数用的theta:线性回归参数x:输入y:目标/target"""m = len(x)J = 1/(2*m) * sum((x.dot(theta).flatten()- y)**2)return J#梯度下降求解损失函数最小值def gradientDesc(x, y, theta=np.zeros((2,1)), alpha=.01,iterations=1500):"""单变量线性回归的梯度下降"""m = y.sizeJ = []for numbers in range(iterations):a = theta[0][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,0])b = theta[1][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,1])theta[0][0],theta[1][0]=a,bprint theta[0][0]print theta[1][0]J.append(cost(x,y,theta))print 'Cost: ' + str(J[-1])return theta#scatterplot of data with option to save figure.def scatterPlot(x,y,yp=None,savePng=False):plt.xlabel('Population of City in 10,000s')plt.ylabel('Profit in $10,000s')plt.scatter(x, y, marker='x')if yp != None:plt.plot(x,yp)if savePng==false:plt.show()else:name = raw_input('Name Figure File: ')plt.savefig(name+'.png')
数据
6.1101,17.5925.5277,9.13028.5186,13.6627.0032,11.8545.8598,6.82338.3829,11.8867.4764,4.34838.5781,126.4862,6.59875.0546,3.81665.7107,3.252214.164,15.5055.734,3.15518.4084,7.22585.6407,0.716185.3794,3.51296.3654,5.30485.1301,0.560776.4296,3.65187.0708,5.38936.1891,3.138620.27,21.7675.4901,4.2636.3261,5.18755.5649,3.082518.945,22.63812.828,13.50110.957,7.046713.176,14.69222.203,24.1475.2524,-1.226.5894,5.99669.2482,12.1345.8918,1.84958.2111,6.54267.9334,4.56238.0959,4.11645.6063,3.392812.836,10.1176.3534,5.49745.4069,0.556576.8825,3.911511.708,5.38545.7737,2.44067.8247,6.73187.0931,1.04635.0702,5.13375.8014,1.84411.7,8.00435.5416,1.01797.5402,6.75045.3077,1.83967.4239,4.28857.6031,4.99816.3328,1.42336.3589,-1.42116.2742,2.47565.6397,4.60429.3102,3.96249.4536,5.41418.8254,5.16945.1793,-0.7427921.279,17.92914.908,12.05418.959,17.0547.2182,4.88528.2951,5.744210.236,7.77545.4994,1.017320.341,20.99210.136,6.67997.3345,4.02596.0062,1.27847.2259,3.34115.0269,-2.68076.5479,0.296787.5386,3.88455.0365,5.701410.274,6.75265.1077,2.05765.7292,0.479535.1884,0.204216.3557,0.678619.7687,7.54356.5159,5.34368.5172,4.24159.1802,6.79816.002,0.926955.5204,0.1525.0594,2.82145.7077,1.84517.6366,4.29595.8707,7.20295.3054,1.98698.2934,0.1445413.394,9.05515.4369,0.61705
逻辑回归
https://github.com/tarlen5/coursera_ml/tree/master/unit6
from numpy import loadtxt, wherefrom pylab import scatter, show, legend, xlabel, ylabel#读取数据data = loadtxt('/home/Julyedu/data/data1.txt', delimiter=',')X = data[:, 0:2]y = data[:, 2]pos = where(y == 1)neg = where(y == 0)scatter(X[pos, 0], X[pos, 1], marker='o', c='b')scatter(X[neg, 0], X[neg, 1], marker='x', c='r')xlabel('Feature1/Exam 1 score')ylabel('Feature2/Exam 2 score')legend(['Fail', 'Pass'])show()
def sigmoid(X):'''定义sigmoid函数'''den =1.0+ e **(-1.0* X)gz =1.0/ denreturn gzdef compute_cost(theta,X,y):'''计算损失函数值'''m = X.shape[0]#训练样本个数theta = reshape(theta,(len(theta),1))#参数thetaJ =(1./m)*(-transpose(y).dot(log(sigmoid(X.dot(theta))))- transpose(1-y).dot(log(1-sigmoid(X.dot(theta)))))grad = transpose((1./m)*transpose(sigmoid(X.dot(theta))- y).dot(X))return J[0][0],graddef compute_grad(theta, X, y):'''计算梯度'''theta.shape =(1,3)grad = zeros(3)h = sigmoid(X.dot(theta.T))delta = h - yl = grad.sizefor i in range(l):sumdelta = delta.T.dot(X[:, i])grad[i]=(1.0/ m)* sumdelta *-1theta.shape =(3,)return grad
from numpy import loadtxt, wherefrom pylab import scatter, show, legend, xlabel, ylabel#读取数据data = loadtxt('/home/Julyedu/data/data2.txt', delimiter=',')X = data[:, 0:2]y = data[:, 2]pos = where(y == 1)neg = where(y == 0)scatter(X[pos, 0], X[pos, 1], marker='o', c='b')scatter(X[neg, 0], X[neg, 1], marker='x', c='r')xlabel('Microchip Test 1')ylabel('Microchip Test 1')legend(['0', '1'])show()

def map_feature(x1, x2):'''特征映射得到多项式组合特征X1, X2, X1 ** 2, X2 ** 2, X1*X2, X1*X2 ** 2,等等...'''x1.shape =(x1.size,1)x2.shape =(x2.size,1)degree =6mapped_fea = ones(shape=(x1[:,0].size,1))m, n = mapped_fea.shapefor i in range(1, degree +1):for j in range(i +1):r =(x1 **(i - j))*(x2 ** j)mapped_fea = append(mapped_fea, r, axis=1)return mapped_feamapped_fea = map_feature(X[:,0], X[:,1])

def cost_function_reg(theta, X, y, l):'''计算损失函数和梯度'''h = sigmoid(X.dot(theta))thetaR = theta[1:,0]J =(1.0/ m)*((-y.T.dot(log(h)))-((1- y.T).dot(log(1.0- h)))) \+(l /(2.0* m))*(thetaR.T.dot(thetaR))delta = h - ysum_delta = delta.T.dot(X[:,1])grad1 =(1.0/ m)* sumdeltaXR = X[:,1:X.shape[1]]sum_delta = delta.T.dot(XR)grad =(1.0/ m)*(sum_delta + l * thetaR)out = zeros(shape=(grad.shape[0], grad.shape[1]+1))out[:,0]= grad1out[:,1:]= gradreturn J.flatten(), out.T.flatten()m, n = X.shapey.shape =(m,1)it = map_feature(X[:,0], X[:,1])#初始化参数initial_theta = zeros(shape=(it.shape[1],1))#正则化系数设为1l =1cost, grad = cost_function_reg(initial_theta, it, y, l)def decorated_cost(theta):return cost_function_reg(theta, it, y, l)print fmin_bfgs(decorated_cost, initial_theta, maxfun=500)
