[关闭]
@hanxiaoyang 2016-05-07T16:58:52.000000Z 字数 4720 阅读 2588

七月算法回归示例

线性回归与逻辑回归


梯度下降

线性回归代码

  1. from __future__ import division
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import math
  6. #将原始数据读成矩阵形式,用到了numpy科学计算包
  7. data = np.genfromtxt('ex1data1.txt',delimiter=',')
  8. x=data[:,0]
  9. y=data[:,1]
  10. m=y.size()
  11. a=ones(m,2)
  12. #计算损失函数值并返回
  13. def cost(x, y, theta=np.zeros((2,1))):
  14. """
  15. 计算和返回损失函数用的
  16. theta:线性回归参数
  17. x:输入
  18. y:目标/target
  19. """
  20. m = len(x)
  21. J = 1/(2*m) * sum((x.dot(theta).flatten()- y)**2)
  22. return J
  23. #梯度下降求解损失函数最小值
  24. def gradientDesc(x, y, theta=np.zeros((2,1)), alpha=.01,iterations=1500):
  25. """
  26. 单变量线性回归的梯度下降
  27. """
  28. m = y.size
  29. J = []
  30. for numbers in range(iterations):
  31. a = theta[0][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,0])
  32. b = theta[1][0] - alpha*(1/m)*sum((x.dot(theta).flatten() - y)*x[:,1])
  33. theta[0][0],theta[1][0]=a,b
  34. print theta[0][0]
  35. print theta[1][0]
  36. J.append(cost(x,y,theta))
  37. print 'Cost: ' + str(J[-1])
  38. return theta
  39. #scatterplot of data with option to save figure.
  40. def scatterPlot(x,y,yp=None,savePng=False):
  41. plt.xlabel('Population of City in 10,000s')
  42. plt.ylabel('Profit in $10,000s')
  43. plt.scatter(x, y, marker='x')
  44. if yp != None:
  45. plt.plot(x,yp)
  46. if savePng==false:
  47. plt.show()
  48. else:
  49. name = raw_input('Name Figure File: ')
  50. plt.savefig(name+'.png')

数据

  1. 6.1101,17.592
  2. 5.5277,9.1302
  3. 8.5186,13.662
  4. 7.0032,11.854
  5. 5.8598,6.8233
  6. 8.3829,11.886
  7. 7.4764,4.3483
  8. 8.5781,12
  9. 6.4862,6.5987
  10. 5.0546,3.8166
  11. 5.7107,3.2522
  12. 14.164,15.505
  13. 5.734,3.1551
  14. 8.4084,7.2258
  15. 5.6407,0.71618
  16. 5.3794,3.5129
  17. 6.3654,5.3048
  18. 5.1301,0.56077
  19. 6.4296,3.6518
  20. 7.0708,5.3893
  21. 6.1891,3.1386
  22. 20.27,21.767
  23. 5.4901,4.263
  24. 6.3261,5.1875
  25. 5.5649,3.0825
  26. 18.945,22.638
  27. 12.828,13.501
  28. 10.957,7.0467
  29. 13.176,14.692
  30. 22.203,24.147
  31. 5.2524,-1.22
  32. 6.5894,5.9966
  33. 9.2482,12.134
  34. 5.8918,1.8495
  35. 8.2111,6.5426
  36. 7.9334,4.5623
  37. 8.0959,4.1164
  38. 5.6063,3.3928
  39. 12.836,10.117
  40. 6.3534,5.4974
  41. 5.4069,0.55657
  42. 6.8825,3.9115
  43. 11.708,5.3854
  44. 5.7737,2.4406
  45. 7.8247,6.7318
  46. 7.0931,1.0463
  47. 5.0702,5.1337
  48. 5.8014,1.844
  49. 11.7,8.0043
  50. 5.5416,1.0179
  51. 7.5402,6.7504
  52. 5.3077,1.8396
  53. 7.4239,4.2885
  54. 7.6031,4.9981
  55. 6.3328,1.4233
  56. 6.3589,-1.4211
  57. 6.2742,2.4756
  58. 5.6397,4.6042
  59. 9.3102,3.9624
  60. 9.4536,5.4141
  61. 8.8254,5.1694
  62. 5.1793,-0.74279
  63. 21.279,17.929
  64. 14.908,12.054
  65. 18.959,17.054
  66. 7.2182,4.8852
  67. 8.2951,5.7442
  68. 10.236,7.7754
  69. 5.4994,1.0173
  70. 20.341,20.992
  71. 10.136,6.6799
  72. 7.3345,4.0259
  73. 6.0062,1.2784
  74. 7.2259,3.3411
  75. 5.0269,-2.6807
  76. 6.5479,0.29678
  77. 7.5386,3.8845
  78. 5.0365,5.7014
  79. 10.274,6.7526
  80. 5.1077,2.0576
  81. 5.7292,0.47953
  82. 5.1884,0.20421
  83. 6.3557,0.67861
  84. 9.7687,7.5435
  85. 6.5159,5.3436
  86. 8.5172,4.2415
  87. 9.1802,6.7981
  88. 6.002,0.92695
  89. 5.5204,0.152
  90. 5.0594,2.8214
  91. 5.7077,1.8451
  92. 7.6366,4.2959
  93. 5.8707,7.2029
  94. 5.3054,1.9869
  95. 8.2934,0.14454
  96. 13.394,9.0551
  97. 5.4369,0.61705

逻辑回归
https://github.com/tarlen5/coursera_ml/tree/master/unit6

  1. from numpy import loadtxt, where
  2. from pylab import scatter, show, legend, xlabel, ylabel
  3. #读取数据
  4. data = loadtxt('/home/Julyedu/data/data1.txt', delimiter=',')
  5. X = data[:, 0:2]
  6. y = data[:, 2]
  7. pos = where(y == 1)
  8. neg = where(y == 0)
  9. scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
  10. scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
  11. xlabel('Feature1/Exam 1 score')
  12. ylabel('Feature2/Exam 2 score')
  13. legend(['Fail', 'Pass'])
  14. show()


data1

  1. def sigmoid(X):
  2. '''定义sigmoid函数'''
  3. den =1.0+ e **(-1.0* X)
  4. gz =1.0/ den
  5. return gz
  6. def compute_cost(theta,X,y):
  7. '''计算损失函数值'''
  8. m = X.shape[0]#训练样本个数
  9. theta = reshape(theta,(len(theta),1))#参数theta
  10. J =(1./m)*(-transpose(y).dot(log(sigmoid(X.dot(theta))))- transpose(1-y).dot(log(1-sigmoid(X.dot(theta)))))
  11. grad = transpose((1./m)*transpose(sigmoid(X.dot(theta))- y).dot(X))
  12. return J[0][0],grad
  13. def compute_grad(theta, X, y):
  14. '''计算梯度'''
  15. theta.shape =(1,3)
  16. grad = zeros(3)
  17. h = sigmoid(X.dot(theta.T))
  18. delta = h - y
  19. l = grad.size
  20. for i in range(l):
  21. sumdelta = delta.T.dot(X[:, i])
  22. grad[i]=(1.0/ m)* sumdelta *-1
  23. theta.shape =(3,)
  24. return grad


data1

  1. from numpy import loadtxt, where
  2. from pylab import scatter, show, legend, xlabel, ylabel
  3. #读取数据
  4. data = loadtxt('/home/Julyedu/data/data2.txt', delimiter=',')
  5. X = data[:, 0:2]
  6. y = data[:, 2]
  7. pos = where(y == 1)
  8. neg = where(y == 0)
  9. scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
  10. scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
  11. xlabel('Microchip Test 1')
  12. ylabel('Microchip Test 1')
  13. legend(['0', '1'])
  14. show()

data1

  1. def map_feature(x1, x2):
  2. '''
  3. 特征映射得到多项式组合特征
  4. X1, X2, X1 ** 2, X2 ** 2, X1*X2, X1*X2 ** 2,等等...
  5. '''
  6. x1.shape =(x1.size,1)
  7. x2.shape =(x2.size,1)
  8. degree =6
  9. mapped_fea = ones(shape=(x1[:,0].size,1))
  10. m, n = mapped_fea.shape
  11. for i in range(1, degree +1):
  12. for j in range(i +1):
  13. r =(x1 **(i - j))*(x2 ** j)
  14. mapped_fea = append(mapped_fea, r, axis=1)
  15. return mapped_fea
  16. mapped_fea = map_feature(X[:,0], X[:,1])

data1

  1. def cost_function_reg(theta, X, y, l):
  2. '''
  3. 计算损失函数和梯度
  4. '''
  5. h = sigmoid(X.dot(theta))
  6. thetaR = theta[1:,0]
  7. J =(1.0/ m)*((-y.T.dot(log(h)))-((1- y.T).dot(log(1.0- h)))) \
  8. +(l /(2.0* m))*(thetaR.T.dot(thetaR))
  9. delta = h - y
  10. sum_delta = delta.T.dot(X[:,1])
  11. grad1 =(1.0/ m)* sumdelta
  12. XR = X[:,1:X.shape[1]]
  13. sum_delta = delta.T.dot(XR)
  14. grad =(1.0/ m)*(sum_delta + l * thetaR)
  15. out = zeros(shape=(grad.shape[0], grad.shape[1]+1))
  16. out[:,0]= grad1
  17. out[:,1:]= grad
  18. return J.flatten(), out.T.flatten()
  19. m, n = X.shape
  20. y.shape =(m,1)
  21. it = map_feature(X[:,0], X[:,1])
  22. #初始化参数
  23. initial_theta = zeros(shape=(it.shape[1],1))
  24. #正则化系数设为1
  25. l =1
  26. cost, grad = cost_function_reg(initial_theta, it, y, l)
  27. def decorated_cost(theta):
  28. return cost_function_reg(theta, it, y, l)
  29. print fmin_bfgs(decorated_cost, initial_theta, maxfun=500)

data1

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注