import numpy as np
def sigmoid(x):
return 1.0/(1+np.asmatrix(np.exp(-x)))
def graD(X,y,alpha,s0,numda):
m=np.size(X,0)
n=np.size(X,1)
X0=X[:,0]
X1=X[:,1:]
theta=np.asmatrix(np.zeros(np.size(X,1))).T
s=100
lit=0
while abs(s)>s0 and lit<=10000:
theta0=theta[0]
theta1=theta[1:]
theta0-=(float(alpha)/m)*X0.T*(sigmoid(X*theta)-y)
theta1-=float(alpha)*((1.0/m)*X1.T*(sigmoid(X*theta)- y)+float(numda)/m*theta1)
theta=np.vstack((np.asmatrix(theta0),np.asmatrix(theta1)))
lit+=1
s=sum((float(1.0)/m)*X.T*(sigmoid(X*theta)-y))/float(n)
return theta
This is a logistic regression using the simple sigmoid function 1/(1+e^(-t)), I can not figure out what is the problem mainly with the function 'graD' part which does the regularized gradient descent, the result is not correct which is as follows:
lg(X,y,0.01,0.1,30)
Out[137]:
matrix([[1000.10539375],
[ 49.33333333]])
and the data I input is: (for X): 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 (for y): 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
You have some errors in your code.
When calculating new theta values you need to use the simultaneous update. In your case changing theta0 you change theta, which then is used in theta1 calculation. It is wrong. You may need to use two temporary variables (or use a vectorized solution).
The cost function is also not correct. It should consist of two parts:
y*log(h)
and (1-y)*log(1-h)
.
As far as I know the cost function cannot be negative, so you don't need to calculate the absolute value of it.
The regularization looks wrong for me too.
Here is my code, it works for me.
import numpy as np
from numpy import *
import matplotlib.pyplot as plt
def sigmoid(x):
return 1.0/(1+np.asmatrix(np.exp(-x)))
def graD(X,y,alpha,s0,numda):
m=np.size(X,0)
X0=X[:,0]
X1=X[:,1:]
theta=np.asmatrix(np.zeros(np.size(X,1))).T
s=100
lit=0
s_history = []
while s>s0 and lit<=10000:
theta0=theta[0]
theta1=theta[1:]
sig = sigmoid(X*theta)
theta0_temp = theta0 - (float(alpha)/m)*X0.T*(sig-y)
theta1_temp = theta1 - (float(alpha)/m)*X1.T*(sig-y)
theta=np.vstack((np.asmatrix(theta0_temp),np.asmatrix(theta1_temp)))
lit+=1
# calculating the cost function
part1 = np.multiply(y, np.log(sig))
part2 = np.multiply((1 - y), np.log(1 - sig))
s = (-part1 - part2).sum()/m
s_history.append(s)
plt.plot(s_history)
plt.title("Cost function")
plt.grid()
plt.show()
print theta
print (-theta[0]/theta[1])
return theta
# Main module
_f = loadtxt('data/ex2data2_n_1.txt', delimiter=',')
_X, _y = _f[:,[0]], _f[:,[1]]
_m = np.shape(_X)[0]
# add a column of 1
_X = np.hstack((np.matrix(np.ones((_m, 1))),_X))
_y = np.matrix(_y)
_alpha = 0.01
_n= np.shape(_X)[1]
_w = np.matrix(np.zeros((_n, 1)))
graD(_X, _y, _alpha, 0.1, 0.1)
Output:
theta =
[[-5.51133636]
[ 0.77301063]]
-theta0/theta1 =
[[ 7.12970317]] #this number is the decision boundary for the 1-D case
The cost function goes down as it should do:
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.