[英]Neural Network for XOR approximation always outputs 0.5 for all inputs
I've tried to implement the following neural network for XOR gate approximation.我尝试为 XOR 门近似实现以下神经网络。 I've used the binary cross entropy for cost function.
我已经将二元交叉熵用于成本函数。 The cost always goes to around 0.69 and gets saturated.
成本总是在 0.69 左右并变得饱和。 And for all inputs the network outputs 0.5.
对于所有输入,网络输出 0.5。 I've tried varying number of epochs, learning rate, number of layers but got no change.
我尝试了不同的时期数、学习率、层数,但没有任何变化。 Where am I going wrong?
我哪里错了?
import numpy as np
import matplotlib.pyplot as plt
epsilon = 1e-15
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def relu(x):
return np.maximum(0, x)
def sigmoid_backwards(A):
return A * (1 - A)
def relu_backwards(A):
return A >= 0
def init_parameters(layer_dims):
paramters = {}
L = len(layer_dims)
for i in range(1, L):
paramters['W' + str(i)] = np.random.randn(layer_dims[i], layer_dims[i-1]) * 0.001
paramters['b' + str(i)] = np.zeros((layer_dims[i], 1))
return paramters
def forward_pass(X, paramters, g):
layer_vals = [X]
A = X
L = len(g)
for i in range(1, L):
A_prev = A
Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
A = activations[g[i]](Z)
layer_vals.append(A)
return layer_vals
def predict(X, paramters, g):
layer_vals = [X]
A = X
L = len(g)
for i in range(1, L):
A_prev = A
Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
A = activations[g[i]](Z)
layer_vals.append(A)
return layer_vals[-1][0]
def backward_pass(y_true, layer_vals, paramters, g, learning_rate=0.01):
m = y_true.shape[1]
dA = -y_true/(layer_vals[-1] + epsilon) + (1-y_true)/(1-layer_vals[-1] + epsilon)
for i in range(len(layer_vals)-1, 0, -1):
dZ = dA * activations_backwards[g[i]](layer_vals[i])
dA_prev = np.dot(paramters['W' + str(i)].T, dZ)
dW = 1/m * np.dot(dZ, layer_vals[i-1].T)
db = 1/m * np.sum(dZ, axis=1, keepdims=True)
dA = dA_prev
paramters['W' + str(i)] -= learning_rate * dW
paramters['b' + str(i)] -= learning_rate * db
return paramters
def compute_cost(y, output):
m = y.shape[1]
return -1/m * np.sum(y * np.log(output+epsilon) + (1-y) * np.log(1-output+epsilon))
activations = {
'sigmoid': sigmoid,
'relu': relu
}
activations_backwards = {
'sigmoid': sigmoid_backwards,
'relu': relu_backwards
}
X = np.array([[0.000000, 0.000000, 1.000000, 1.000000],
[0.000000, 1.000000, 0.000000, 1.000000]], dtype=float)
y = np.array([[0.0, 1.0, 1.0, 0.0]], dtype=float)
layer_dims = (2, 3, 3, 1)
#g = ['linear', 'sigmoid', 'sigmoid', 'sigmoid']
g = ['linear', 'relu', 'relu', 'sigmoid']
epochs = 1000
learning_rate = 0.01
paramters = init_parameters(layer_dims)
layer_vals = forward_pass(X, paramters, g)
costs = []
for i in range(epochs):
parameters = backward_pass(y, layer_vals, paramters, g, learning_rate=learning_rate)
layer_vals = forward_pass(X, paramters, g)
cost = compute_cost(y, layer_vals[-1])
costs.append(cost)
if (i+1) % 10 == 0:
print(f"After {i+1} epochs at learning rate {learning_rate:.4f}, cost: ", cost)
plt.plot(costs)
plt.show()
print(predict(X, paramters, g))
Here is the curve for cost function value Cost Curve .这是成本函数值Cost Curve 的曲线。 And the output is always
[0.5 0.5 0.5 0.5]
并且输出总是
[0.5 0.5 0.5 0.5]
Nice try on neural nets;神经网络的不错尝试; you're very close!
你非常接近! Doing the following fixes your code:
执行以下操作可修复您的代码:
parameters = backward_pass()
that causes your parameters not do be updated.parameters = backward_pass()
中有一个错字,导致您的参数不会被更新。 Here's the working code:这是工作代码:
import numpy as np
import matplotlib.pyplot as plt
epsilon = 1e-15
def sigmoid(x):
return 1. / (1. + np.exp(-x))
def relu(x):
return np.maximum(0, x)
def sigmoid_backwards(A):
return A * (1 - A)
def relu_backwards(A):
return A >= 0
def init_parameters(layer_dims):
paramters = {}
L = len(layer_dims)
for i in range(1, L):
paramters['W' + str(i)] = np.random.randn(layer_dims[i], layer_dims[i-1]) * 1.
paramters['b' + str(i)] = np.random.randn(layer_dims[i], 1) * 1. # np.zeros((layer_dims[i], 1))
return paramters
def forward_pass(X, paramters, g):
layer_vals = [X]
A = X
L = len(g)
for i in range(1, L):
A_prev = A
Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
A = activations[g[i]](Z)
layer_vals.append(A)
return layer_vals
def predict(X, paramters, g):
layer_vals = [X]
A = X
L = len(g)
for i in range(1, L):
A_prev = A
Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
A = activations[g[i]](Z)
layer_vals.append(A)
return layer_vals[-1][0]
def backward_pass(y_true, layer_vals, paramters, g, learning_rate=0.01):
m = y_true.shape[1]
dA = -y_true/(layer_vals[-1] + epsilon) + (1-y_true)/(1-layer_vals[-1] + epsilon)
#dA = -y_true * np.log(layer_vals[-1]+epsilon) + (1.-y_true) * np.log(1-layer_vals[-1]+epsilon)
#print('dAnew', dA)
for i in range(len(layer_vals)-1, 0, -1):
#import pdb;pdb.set_trace()
dZ = dA * activations_backwards[g[i]](layer_vals[i])
dA_prev = np.dot(paramters['W' + str(i)].T, dZ)
dW = 1/m * np.dot(dZ, layer_vals[i-1].T)
db = 1/m * np.sum(dZ, axis=1, keepdims=True)
dA = dA_prev
paramters['W' + str(i)] -= learning_rate * dW
paramters['b' + str(i)] -= learning_rate * db
return paramters
def compute_cost(y, output):
m = y.shape[1]
return -1/m * np.sum(y * np.log(output+epsilon) + (1-y) * np.log(1-output+epsilon))
activations = {
'sigmoid': sigmoid,
'relu': relu
}
activations_backwards = {
'sigmoid': sigmoid_backwards,
'relu': relu_backwards
}
X = np.array([[0.000000, 0.000000, 1.000000, 1.000000],
[0.000000, 1.000000, 0.000000, 1.000000]], dtype=float)
y = np.array([[0.0, 1.0, 1.0, 0.0]], dtype=float)
layer_dims = (2, 32, 32, 1)
#g = ['linear', 'sigmoid', 'sigmoid', 'sigmoid']
g = ['linear', 'relu', 'relu', 'sigmoid']
epochs = 10000
learning_rate = 0.001
paramters = init_parameters(layer_dims)
layer_vals = forward_pass(X, paramters, g)
costs = []
for i in range(epochs):
paramters = backward_pass(y, layer_vals, paramters, g, learning_rate=learning_rate)
layer_vals = forward_pass(X, paramters, g)
cost = compute_cost(y, layer_vals[-1])
costs.append(cost)
if (i+1) % 10 == 0:
print(f"After {i+1} epochs at learning rate {learning_rate:.4f}, cost: ", cost)
plt.plot(costs)
plt.savefig("delete.png")
print(predict(X, paramters, g))
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.