@hanxiaoyang
2016-05-07T16:58:52.000000Z
字数 4720
阅读 2588
线性回归与逻辑回归
线性回归代码
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
#将原始数据读成矩阵形式,用到了numpy科学计算包
data = np.genfromtxt('ex1data1.txt',delimiter=',')
x=data[:,0]
y=data[:,1]
m=y.size()
a=ones(m,2)
#计算损失函数值并返回
def cost(x, y, theta=np.zeros((2,1))):
"""
计算和返回损失函数用的
theta:线性回归参数
x:输入
y:目标/target
"""
m = len(x)
J = 1/(2*m) * sum((x.dot(theta).flatten()- y)**2)
return J
#梯度下降求解损失函数最小值
def gradientDesc(x, y, theta=np.zeros((2,1)), alpha=.01,iterations=1500):
"""
单变量线性回归的梯度下降
"""
m = y.size
J = []
for numbers in range(iterations):
a = theta[0][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,0])
b = theta[1][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,1])
theta[0][0],theta[1][0]=a,b
print theta[0][0]
print theta[1][0]
J.append(cost(x,y,theta))
print 'Cost: ' + str(J[-1])
return theta
#scatterplot of data with option to save figure.
def scatterPlot(x,y,yp=None,savePng=False):
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.scatter(x, y, marker='x')
if yp != None:
plt.plot(x,yp)
if savePng==false:
plt.show()
else:
name = raw_input('Name Figure File: ')
plt.savefig(name+'.png')
数据
6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
7.4764,4.3483
8.5781,12
6.4862,6.5987
5.0546,3.8166
5.7107,3.2522
14.164,15.505
5.734,3.1551
8.4084,7.2258
5.6407,0.71618
5.3794,3.5129
6.3654,5.3048
5.1301,0.56077
6.4296,3.6518
7.0708,5.3893
6.1891,3.1386
20.27,21.767
5.4901,4.263
6.3261,5.1875
5.5649,3.0825
18.945,22.638
12.828,13.501
10.957,7.0467
13.176,14.692
22.203,24.147
5.2524,-1.22
6.5894,5.9966
9.2482,12.134
5.8918,1.8495
8.2111,6.5426
7.9334,4.5623
8.0959,4.1164
5.6063,3.3928
12.836,10.117
6.3534,5.4974
5.4069,0.55657
6.8825,3.9115
11.708,5.3854
5.7737,2.4406
7.8247,6.7318
7.0931,1.0463
5.0702,5.1337
5.8014,1.844
11.7,8.0043
5.5416,1.0179
7.5402,6.7504
5.3077,1.8396
7.4239,4.2885
7.6031,4.9981
6.3328,1.4233
6.3589,-1.4211
6.2742,2.4756
5.6397,4.6042
9.3102,3.9624
9.4536,5.4141
8.8254,5.1694
5.1793,-0.74279
21.279,17.929
14.908,12.054
18.959,17.054
7.2182,4.8852
8.2951,5.7442
10.236,7.7754
5.4994,1.0173
20.341,20.992
10.136,6.6799
7.3345,4.0259
6.0062,1.2784
7.2259,3.3411
5.0269,-2.6807
6.5479,0.29678
7.5386,3.8845
5.0365,5.7014
10.274,6.7526
5.1077,2.0576
5.7292,0.47953
5.1884,0.20421
6.3557,0.67861
9.7687,7.5435
6.5159,5.3436
8.5172,4.2415
9.1802,6.7981
6.002,0.92695
5.5204,0.152
5.0594,2.8214
5.7077,1.8451
7.6366,4.2959
5.8707,7.2029
5.3054,1.9869
8.2934,0.14454
13.394,9.0551
5.4369,0.61705
逻辑回归
https://github.com/tarlen5/coursera_ml/tree/master/unit6
from numpy import loadtxt, where
from pylab import scatter, show, legend, xlabel, ylabel
#读取数据
data = loadtxt('/home/Julyedu/data/data1.txt', delimiter=',')
X = data[:, 0:2]
y = data[:, 2]
pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Feature1/Exam 1 score')
ylabel('Feature2/Exam 2 score')
legend(['Fail', 'Pass'])
show()
def sigmoid(X):
'''定义sigmoid函数'''
den =1.0+ e **(-1.0* X)
gz =1.0/ den
return gz
def compute_cost(theta,X,y):
'''计算损失函数值'''
m = X.shape[0]#训练样本个数
theta = reshape(theta,(len(theta),1))#参数theta
J =(1./m)*(-transpose(y).dot(log(sigmoid(X.dot(theta))))- transpose(1-y).dot(log(1-sigmoid(X.dot(theta)))))
grad = transpose((1./m)*transpose(sigmoid(X.dot(theta))- y).dot(X))
return J[0][0],grad
def compute_grad(theta, X, y):
'''计算梯度'''
theta.shape =(1,3)
grad = zeros(3)
h = sigmoid(X.dot(theta.T))
delta = h - y
l = grad.size
for i in range(l):
sumdelta = delta.T.dot(X[:, i])
grad[i]=(1.0/ m)* sumdelta *-1
theta.shape =(3,)
return grad
from numpy import loadtxt, where
from pylab import scatter, show, legend, xlabel, ylabel
#读取数据
data = loadtxt('/home/Julyedu/data/data2.txt', delimiter=',')
X = data[:, 0:2]
y = data[:, 2]
pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Microchip Test 1')
ylabel('Microchip Test 1')
legend(['0', '1'])
show()
def map_feature(x1, x2):
'''
特征映射得到多项式组合特征
X1, X2, X1 ** 2, X2 ** 2, X1*X2, X1*X2 ** 2,等等...
'''
x1.shape =(x1.size,1)
x2.shape =(x2.size,1)
degree =6
mapped_fea = ones(shape=(x1[:,0].size,1))
m, n = mapped_fea.shape
for i in range(1, degree +1):
for j in range(i +1):
r =(x1 **(i - j))*(x2 ** j)
mapped_fea = append(mapped_fea, r, axis=1)
return mapped_fea
mapped_fea = map_feature(X[:,0], X[:,1])
def cost_function_reg(theta, X, y, l):
'''
计算损失函数和梯度
'''
h = sigmoid(X.dot(theta))
thetaR = theta[1:,0]
J =(1.0/ m)*((-y.T.dot(log(h)))-((1- y.T).dot(log(1.0- h)))) \
+(l /(2.0* m))*(thetaR.T.dot(thetaR))
delta = h - y
sum_delta = delta.T.dot(X[:,1])
grad1 =(1.0/ m)* sumdelta
XR = X[:,1:X.shape[1]]
sum_delta = delta.T.dot(XR)
grad =(1.0/ m)*(sum_delta + l * thetaR)
out = zeros(shape=(grad.shape[0], grad.shape[1]+1))
out[:,0]= grad1
out[:,1:]= grad
return J.flatten(), out.T.flatten()
m, n = X.shape
y.shape =(m,1)
it = map_feature(X[:,0], X[:,1])
#初始化参数
initial_theta = zeros(shape=(it.shape[1],1))
#正则化系数设为1
l =1
cost, grad = cost_function_reg(initial_theta, it, y, l)
def decorated_cost(theta):
return cost_function_reg(theta, it, y, l)
print fmin_bfgs(decorated_cost, initial_theta, maxfun=500)