[英]Python ReLu activation function desn't work
我的第一个神经网络使用了S型激活函数,并且运行良好。 现在我想切换到更高级的激活功能(ReLu)。 但是使用ReLu,我的NN根本不起作用。 90%的错误,而使用S形时则有4%的错误。 我在代码中找不到错误。 帮我。
class NeuralNetwork:
def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate = 0.1):
self.input_nodes = input_nodes
self.hidden_nodes = hidden_nodes
self.output_nodes = output_nodes
self.learning_rate = learning_rate
self.weights_ih = np.random.normal(0.0, pow(input_nodes, -0.5), (hidden_nodes, input_nodes))
self.weights_ho = np.random.normal(0.0, pow(hidden_nodes, -0.5), (output_nodes, hidden_nodes))
self.bias_h = np.random.normal(0.0, pow(1, -0.5), (hidden_nodes, 1))
self.bias_o = np.random.normal(0.0, pow(1, -0.5), (output_nodes, 1))
def activation_function(self, x):
return x * (x > 0)
def activation_function_d(self, x):
return 1 * (x >= 0)
def train(self, inputs_list, targets_list):
inputs = np.array(inputs_list, ndmin=2).T
targets = np.array(targets_list, ndmin=2).T
# Feedforward
hidden_inputs = np.dot(self.weights_ih, inputs) + self.bias_h
hidden = self.activation_function(hidden_inputs)
output_inputs = np.dot(self.weights_ho, hidden) + self.bias_o
outputs = self.activation_function(output_inputs)
# Calculate errors
output_errors = targets - outputs
hidden_errors = np.dot(self.weights_ho.T, output_errors)
# Calculate gradients
output_gradient = output_errors * self.activation_function_d(output_inputs) * self.learning_rate
hidden_gradient = hidden_errors * self.activation_function_d(hidden_inputs) * self.learning_rate
# Calculate deltas
output_deltas = np.dot(output_gradient, hidden.T)
hidden_deltas = np.dot(hidden_gradient, inputs.T)
# Adjust weights and biases by deltas and gradients
self.weights_ho += output_deltas
self.weights_ih += hidden_deltas
self.bias_o += output_gradient
self.bias_h += hidden_gradient
def predict(self, inputs_list):
inputs = np.array(inputs_list, ndmin=2).T
hidden = self.activation_function(np.dot(self.weights_ih, inputs) + self.bias_h)
outputs = self.activation_function(np.dot(self.weights_ho, hidden) + self.bias_o)
return outputs.flatten().tolist()
和培训代码:
with open('mnist_train.csv') as train_file:
for str in train_file:
data = [int(char) for char in str.split(',')]
inputs = data[1:]
targets = [1 if i == data[0] else 0 for i in range(10)]
nn.train(inputs, targets)
无论您要做什么,最后一层都应始终使用sigmoid(在二进制情况下)。
sigmoid函数用于估计示例在给定类别中的概率 ,示例的预测是该示例具有最高概率的类别。
最后,请更改此:
def predict(self, inputs_list):
inputs = np.array(inputs_list, ndmin=2).T
hidden = self.activation_function(np.dot(self.weights_ih, inputs) + self.bias_h)
outputs = self.activation_function(np.dot(self.weights_ho, hidden) + self.bias_o)
return outputs.flatten().tolist()
对此
def predict(self, inputs_list):
inputs = np.array(inputs_list, ndmin=2).T
hidden = self.activation_function(np.dot(self.weights_ih, inputs) + self.bias_h)
outputs = sigmoid(np.dot(self.weights_ho, hidden) + self.bias_o) // create a sigmoid function
return outputs.flatten().tolist()
并在培训中:
# Feedforward
hidden_inputs = np.dot(self.weights_ih, inputs) + self.bias_h
hidden = self.activation_function(hidden_inputs)
output_inputs = np.dot(self.weights_ho, hidden) + self.bias_o
outputs = self.activation_function(output_inputs)
至:
# Feedforward
hidden_inputs = np.dot(self.weights_ih, inputs) + self.bias_h
hidden = self.activation_function(hidden_inputs)
output_inputs = np.dot(self.weights_ho, hidden) + self.bias_o
outputs = sigmoid(output_inputs)
和
# Calculate gradients
output_gradient = output_errors * self.activation_function_d(output_inputs) * self.learning_rate
hidden_gradient = hidden_errors * self.activation_function_d(hidden_inputs) * self.learning_rate
至
# Calculate gradients
output_gradient = output_errors * sigmoid_d(output_inputs) * self.learning_rate
hidden_gradient = hidden_errors * self.activation_function_d(hidden_inputs) * self.learning_rate
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.