[英]Neural Network fails on mnist
我用python編寫了一個神經網絡來解決mnist任務。 但是在一個時期之后,錯誤率的變化很小(逗號后的第6位數字),並且在10000個時期之后,網絡學習得還不是很多。 我將學習率eta設置為0.05。
import numpy as np
import pickle
import time
class FeedForwardNetwork():
def __init__(self, input_dim, hidden_dim, output_dim):
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.output_dim = output_dim
self.input_layer = np.array([])
self.hidden_layer = np.array([])
self.output_layer = np.array([])
self.weights_input_hidden = (2 * np.random.random((input_dim, hidden_dim)) - 1)/1000
self.weights_hidden_output = (2* np.random.random((hidden_dim, output_dim)) - 1)/1000
self.validation_data = np.array([])
self.validation_data_solution = np.array([])
def _tanh(self, x, deriv=False):
if not deriv:
return np.tanh(x)
return 1-np.tanh(x)**2
def _softmax(self, x):
return np.exp(x) / np.sum(np.exp(x), axis=0)
def set_training_data(self, training_data_input, training_data_target):
"""Splits the data up into training and validation data with a ratio of 0.75/0.25 and sets the data for training."""
if len(training_data_input) != len(training_data_target):
raise Exception("Number of training examples and training targets does not match!")
len_training_data = int((len(training_data_input)/100*75)//1)
self.input_layer = training_data_input[:len_training_data]
self.output_layer = training_data_target[:len_training_data]
self.validation_data = np.array([training_data_input[len_training_data:]])
self.validation_data_solution = np.array([training_data_target[len_training_data:]])
def save(self, filename):
"""Saves the weights into a pickle file."""
with open(filename, "wb") as network_file:
pickle.dump(self.weights_input_hidden, network_file)
pickle.dump(self.weights_hidden_output, network_file)
def load(self, filename):
"""Loads network weights from a pickle file."""
with open(filename, "rb") as network_file:
weights_input_hidden = pickle.load(network_file)
weights_hidden_output = pickle.load(network_file)
if len(weights_input_hidden) != len(self.weights_input_hidden):
raise Exception("File contains weights that does not match the current networks size!")
if len(weights_hidden_output) != len(self.weights_hidden_output):
raise Exception("File contains weights that does not match the current networks size!")
self.weights_input_hidden = weights_input_hidden
self.weights_hidden_output = weights_hidden_output
def measure_error(self, input_data, output_data):
return 1/2 * np.sum((output_data - self.activate(input_data))**2)
def forward_propagate(self, input_data):
"""Proceds the input data from input neurons up to output neurons and returns the output layer"""
input_layer = input_data
self.hidden_layer = self.__tanh(np.dot(input_layer, self.weights_input_hidden))
output_layer = self.__tanh(np.dot(self.hidden_layer, self.weights_hidden_output))
return output_layer
def activate(self, input_data):
"""Sends the given input through the net and returns the net's prediction."""
return self.forward_propagate(input_data)
def back_propagate(self, input_data, output_data, eta):
"""Calculates the difference between target output and output and adjust the weights to fit the target output better.
The parameter eta is the learning rate."""
num_of_samples = len(input_data)
output_layer = self.forward_propagate(input_data)
output_layer_error = output_data - output_layer
output_layer_delta = output_layer_error * self.__tanh(output_layer, deriv=True)
#How much did each hidden neuron contribute to the output error?
#Multiplys delta term with weights
hidden_layer_error = output_layer_delta.dot(self.weights_hidden_output.T)
#If the prediction is good, the second term will be small and the change will be small
#Ex: target: 1 -> Slope will be 1 so the second term will be big
hidden_layer_delta = hidden_layer_error * self.__tanh(self.hidden_layer, deriv=True)
#The both lines return a matrix. A row stands for all weights connected to one neuron.
#E.g. [1, 2, 3] -> Weights to Neuron A
# [4, 5, 6] -> Weights to Neuron B
hidden_weights_change = self.input_layer.T.dot(hidden_layer_delta)/num_of_samples
output_weights_change = self.hidden_layer.T.dot(output_layer_delta)/num_of_samples
self.weights_hidden_output += (output_weights_change * eta) / num_of_samples
self.weights_input_hidden += (hidden_weights_change * eta) / num_of_samples
def batch_train(self, epochs, eta, patience=10):
"""Trains the network in batch mode that means the weigts are updated after showing all training examples.
Eta is the learning rate and patience is the number of epochs that the validation error is allowed to increase before aborting."""
validation_error = self.measure_error(self.validation_data, self.validation_data_solution)
for epoch in range(epochs):
self.back_propagate(self.input_layer, self.output_layer, eta)
validation_error_new = self.measure_error(self.validation_data, self.validation_data_solution)
if validation_error_new < validation_error:
validation_error = validation_error_new
else:
patience -= 1
if patience == 0:
print("Abort Training. Overfitting has started! Epoch: {0}. Error: {1}".format(epoch, validation_error_new))
return
print("Epoch: {0}, Error: {1}".format(epoch, validation_error))
self.save("Network_Mnist.net")
謝謝!
時代:1813,錯誤:7499.944371111551時代:1814,錯誤:7499.944368765047
我猜您可能想添加帶有交叉熵錯誤的softmax圖層。 當輸入為負時,Tanh將輸出負值,這顯然不是您想要的輸出層,因為概率應在[0,1]范圍內。
這是我實施的玩具前饋NN,可能對您有所幫助。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.