I'm relatively new to machine learning, and as a starter project, I decided to implement my own neural network from scratch in Python using NumPy. As such, I have manually implemented methods for forward propagation, backpropagation, and calculating function derivatives.
For my testing data, I wrote a function that generates values of sin(x). When I finally create and train my network, my outputs fluctuate quite a lot with each trial and are significantly off the true values(although they are a decent improvement over the initial predictions).
I have tried adjusting quite a few settings, including the learning rate, number of neurons, number of layers, training iterations, and activation function, but I still end up with a squared cost of around 0.1 over my input data.
I think my derivative functions and chain rule expressions are correct since when I use just one input sample I get a near-perfect answer.
Adding more input data, however, significantly reduces the accuracy of the network.
Do you guys have any suggestions for how to improve this network, or is there anything I'm doing wrong currently?
My code:
import numpy as np
#Generate input data for the network
def inputgen():
inputs=[]
outputs=[]
i=0.01
for x in range(10000):
inputs.append([round(i,7)])
outputs.append([np.sin(i)]) #output is sin(x)
i+=0.0001
return [inputs,outputs]
#set training input and output
inputs = np.array(inputgen()[0])
outputs = np.array(inputgen()[1])
#sigmoid activation function and derivative
def sigmoid(x):
return 1/(1+np.exp(-x))
def sigmoid_derivative(x):
return sigmoid(x)*(1-sigmoid(x))
#tanh activation function and derivative
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1-((tanh(x))**2)
#Layer class
class Layer:
def __init__(self,num_neurons,num_inputs,inputs):
self.num_neurons = num_neurons #number of neurons in hidden layers
self.num_inputs = num_inputs #number of input neurons(1 in the case of testing data)
self.inputs = inputs
self.weights = np.random.rand(num_inputs,num_neurons)*np.sqrt(1/num_inputs) #weights initialized by Xavier function
self.biases = np.zeros((1,num_neurons)) #biases initialized as 0
self.z = np.dot(self.inputs,self.weights)+self.biases #Cacluate z
self.a = tanh(self.z) #Calculate activation
self.dcost_a = [] #derivative of cost with respect to activation
self.da_z = [] #derivative of activation with respect to z
self.dz_w = [] #derivative of z with respect to weight
self.dcost_w = [] #derivative of cost with respect to weight
self.dcost_b = [] #derivative of cost with respect to bias
#functions used in forwardpropagation
def compute_z(self):
self.z = np.dot(self.inputs,self.weights)+self.biases
return self.z
def activation(self):
self.a = tanh(self.compute_z())
def forward(self):
self.activation()
#Network class
class Network:
def __init__(self,num_layers,num_neurons,num_inputs,inputs,num_outputs,outputs):
self.learningrate = 0.01 #learning rate
self.num_layers=num_layers #number of hidden layers
self.num_neurons=num_neurons #number of neurons in hidden layers
self.num_inputs = num_inputs #number of input neurons
self.inputs=inputs
self.expected_outputs=outputs
self.layers=[]
for x in range(num_layers):
if x==0:
self.layers.append(Layer(num_neurons,num_inputs,inputs)) #Initial layer with given inputs
else:
#Other layers have an input which is the activation of previous layer
self.layers.append(Layer(num_neurons,len(self.layers[x-1].a[0]),self.layers[x-1].a))
self.prediction = Layer(num_outputs,num_neurons,self.layers[-1].a) #prediction
self.layers.append(self.prediction)
self.cost = (self.prediction.a-self.expected_outputs)**2 #cost
#forwardpropagation
def forwardprop(self):
for x in range(self.num_layers+1):
if(x!=0):
self.layers[x].inputs=self.layers[x-1].a
self.layers[x].forward()
self.prediction=self.layers[-1] #update prediction value
def backprop(self):
self.cost = (self.prediction.a-self.expected_outputs)**2
for x in range(len(self.layers)-1,-1,-1):
if(x==len(self.layers)-1):
dcost_a = 2*(self.prediction.a-self.expected_outputs) #derivative of cost with respect to activation for output layer
else:
#derivative of cost with respect to activation for hidden layers(chain rule)
dcost_a=np.zeros((len(self.layers[x].inputs),self.num_neurons)).T
dcost_a1=self.layers[x+1].dcost_a.T
da_z1=self.layers[x+1].da_z.T
dz_a=(self.layers[x+1].weights).T
for z in range(len(dcost_a1)):
dcost_a+=((dcost_a1[z])*da_z1)
for j in range(len(dcost_a)):
dcost_a[j]*=dz_a[z][j]
dcost_a=dcost_a.T
self.layers[x].dcost_a=dcost_a
#derivative of activation with respect to z
da_z = tanh_derivative(self.layers[x].z)
self.layers[x].da_z=da_z
#derivative of z with respect to weights
dz_w = []
if x!=0:
dz_w=self.layers[x-1].a
else:
dz_w=self.inputs
self.layers[x].dz_w=dz_w
#change weights and biases
for x in range(len(self.layers)-1,-1,-1):
#Average each of the derivatives over all training samples
self.layers[x].dcost_a=np.average(self.layers[x].dcost_a,axis=0)
self.layers[x].da_z=np.average(self.layers[x].da_z,axis=0)
self.layers[x].dz_w=(np.average(self.layers[x].dz_w,axis=0)).T
self.layers[x].dcost_w = np.zeros((self.layers[x].weights.shape))
self.layers[x].dcost_b = self.layers[x].dcost_a*self.layers[x].da_z
for v in range(len(self.layers[x].dz_w)):
self.layers[x].dcost_w[v] = (self.layers[x].dcost_a*self.layers[x].da_z)*self.layers[x].dz_w[v]
#update weights and biases
self.layers[x].weights-=(self.layers[x].dcost_w)*self.learningrate
self.layers[x].biases-=(self.layers[x].dcost_b)*self.learningrate
#train the network
def train(self):
for x in range(1000):
self.backprop()
self.forwardprop()
Network1 = Network(3,3,1,inputs,1,outputs)
Network1.train()
print(Network1.prediction.a)
Sample input:
[[0.01 ]
[0.0101]
[0.0102]
...
[1.0097]
[1.0098]
[1.0099]]
Sample output:
[[0.37656753]
[0.37658777]
[0.37660802]
...
[0.53088048]
[0.53089046]
[0.53090043]]
Expected output:
[[0.00999983]
[0.01009983]
[0.01019982]
...
[0.84667225]
[0.84672546]
[0.84677865]]
I would keep track of the cost_history and update your learning rate as such.
If you have been - getting closer to the actual value, increase learning rate by 5% - getting further away, decrease the learning rate by 50%
def update_learning_rate(self):
if(len(self.cost_history) < 2):
return
if(self.cost_history[0] > self.cost_history[1]):
self.learning_rate /= 2
else:
self.learning_rate *= 1.05
this should actually yield surprisingly better results
what usually happens is that you might be getting stuck in one of the local minima (d) and not the absolute minimum (b). Ignore the labels, this is just a random photo I found online.
Few things I would recommend to try:
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.