I wrote code with numpy(theta, X is numpy array):
def CostRegFunction(X, y, theta, lambda_):
m = len(X)
# add bias unit
X = np.concatenate((np.ones((m,1)),X),1)
H = np.dot(X,theta)
J = (1 / (2 * m)) * (np.sum([(H[i] - y[i][0])**2 for i in range(len(H))])) + (lambda_ / (2 * m)) * np.sum(theta[1:]**2)
grad_ = list()
grad_.append((1 / m) * np.sum([(H[j] - y[j][0]) for j in range(len(H))]))
for i in range(len(theta)-1):
grad_.append((1 / m) * np.sum([(H[j] - y[j]) * X[j][i+1] for j in range(len(H))]) + (lambda_ / m) * theta[i+1])
return J, grad_
def TrainLinearReg(X, y, theta, lambda_, alpha, iter):
JHistory = list()
for i in range(iter):
J, grad = CostRegFunction(X, y, theta, Lambda_)
JHistory.append(J)
for j in range(len(theta)):
theta[j] = theta[j] - alpha * grad[j]
return theta, JHistory
Theta, JH = TrainLinearReg(X, y, th, Lambda_, 0.01, 50)
But when I try learn theta this code gives me a realy huge grow of theta and value of J. For example first iteration grad = [-15.12452, 598.435436] - it is correct. J is 303.3255 2nd iteration - grad = [10.23566,-3646.2345] J = 7924 and so on J grows faster and faster but on idea of LR it must be lower.
But if I use Normal Linear Equation in gives me a good Theta.
What is wrong in that code?
. edited on Oct 17th
I rewrote parts of the code using numpy libraries.
All vectors are now columns numpy arrays.
import numpy as np
from copy import deepcopy as dc
from matplotlib import pyplot as plt
_norm = np.linalg.norm
def CostRegFunction(X, y, theta, lambda_):
m = len(X)
H = np.dot(X,theta)
J = (1 / (2 * m)) * _norm(H-y)**2 + (lambda_ / (2 * m)) * _normal(theta[1:])**2
grad_ = np.array(sum(H-y)/m,ndmin=2).T
for i in range(theta.shape[0]-1):
grad_=np.concatenate((grad_,np.array(sum((H-y)*np.array(X[:,1],ndmin=2).T)/m + (lambda_/m) * theta[i+1],ndmin=2).T),0)
return J, grad_
def TrainLinearReg(X, y, theta, lambda_, alpha, iter):
JHistory = list()
# add bias unit -> it's better to do it here, before entering the loop
X = np.concatenate((np.ones((X.shape[0],1)),X),1)
for i in range(iter):
J, grad = CostRegFunction(X, y, theta, lambda_)
JHistory.append(J)
theta = theta - alpha*grad
return theta, JHistory
Then I generated a simple set of xy polynomial data with white noise and fitted the polynom-equation using the TrainLinearReg
function.
x = np.concatenate((np.array(np.linspace(0,10,100) + np.random.normal(0,0.01,100),ndmin=2).T,\
np.array(np.linspace(0,10,100)**2 + np.random.normal(0,0.01,100),ndmin=2).T),1)
y = 2 + -3*np.array(x[:,0],ndmin=2).T + np.random.normal(0,3,[100,1]) - 2*np.array(x[:,1],ndmin=2).T
th = np.array([1,2,3],ndmin=2).T
alpha = 0.001
lambda_ = 0.1
Theta, JH = TrainLinearReg(x, y, dc(th), lambda_, alpha, 10000)
What I get is the following.
plt.plot(x[:,0],y,'o',label='Original Data',alpha = 0.5)
x2 = np.linspace(0,10,10)
plt.plot(x2,Theta[0]+x2*Theta[1]+x2**2*Theta[2],'-',label='Fitted Curve',lw=1.5,alpha=0.8,color='black')
plt.gca().set_xlabel('x')
plt.gca().set_ylabel('y')
plt.legend()
Output >> Theta = array([[ 1.29259285],
[-2.97763304],
[-1.98758321]])
I hope I have been of some help.
Best regards, Gabriel
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.