[英]implement Neural Network in python
我正在嘗試使用numpy在python中實現神經網絡(NN),但發現我的NN不能按預期工作。
我已經檢查了數值梯度並將其與通過反向傳播計算的梯度進行比較。 事實證明我是對的。 但是成本下降得非常緩慢,並且在某些時期后反彈。
我正在嘗試解決“異或”的問題。 但是我的神經網絡似乎忽略了每個樣本的輸入向量,並傾向於將所有樣本預測為標簽為1的樣本的百分比(例如,如果我給它添加3個陽性樣本和1個陰性樣本,它將預測所有4個樣本約0.75)。
誰能幫助我解決這個問題? 這已經困擾了我很長時間了。
這是神經網絡的結構和一些公式
這是我的代碼
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(565113221)
def sigmoid(x): # sigmoid function
return 1/(1+np.power(np.e,-x))
def forward(x,W1,W2,b1,b2): # feed forward
a = W1.dot(x)
z = sigmoid(a+b1)
b = W2.dot(z)
y = sigmoid(b+b2)
return a,z,b,y
def pred(X,W1,W2,b1,b2): # predict
y_pred = np.zeros((X.shape[0],1))
for i in range(X.shape[0]):
_,_,_,y_pred[i] = forward(x.reshape((-1,1)),W1,W2,b1,b2)
return y_pred
X = np.array([[0,0],[0,1],[1,0],[1,1]]) # features 4 * 2
Y = np.array([[0],[1],[1],[0]]) # labels 4 * 1
epsilon = 0.12 # initialize all weighs between -0.12 ~ 0.12
W1 = np.random.random((2,2)) * epsilon * 2 - epsilon # map from input to hidden
b1 = np.random.random((2,1)) * epsilon * 2 - epsilon # bias1
W2 = np.random.random((1,2)) * epsilon * 2 - epsilon # map from hidden to output
b2 = np.random.random((1,1)) * epsilon * 2 - epsilon # bias2
epoch = 50 # maximum training turns
alpha = 0.01 # learning rate
for turn in range(epoch):
print('turn:',turn,end=' ')
epoch_cost = 0
for index in range(X.shape[0]):
x = X[index,:].reshape((-1,1))
y = Y[index,:].reshape((-1,1))
a,z,b,y_pred = forward(x,W1,W2,b1,b2) # feed forward
cost = -y.dot(np.log(y_pred)) - (1-y).dot(np.log(1-y_pred)) # calculate cost
epoch_cost += cost # calculate cumulative cost of this epoch
for k in range(W2.shape[0]): # update W2
for j in range(W2.shape[1]):
W2[k,j] -= alpha * (y_pred - y) * z[j,0]
for k in range(b2.shape[0]): # update b2
b2[k,0] -= alpha * (y_pred - y)
for j in range(W1.shape[0]): # update W1
for i in range(W1.shape[1]):
for k in range(W2.shape[0]):
W1[j,i] -= alpha * (y_pred - y) * W2[k,j] * z[j,0] * (1 - z[j,0]) * x[i]
for j in range(b1.shape[0]): # update b1
b1[j,0] -= alpha * (y_pred - y) * W2[k,j] * z[j,0] * (1 - z[j,0])
print('cost:',epoch_cost)
print('prediction\n',pred(X,W1,W2,b1,b2))
print('ground-truth\n',Y)
沒有完整的答案。 我剛剛用類似於簡單進化策略(ES)的東西代替了梯度下降。 這行得通,因此您的向前傳遞中可能沒有錯誤。
# [...] sigmoid(), forward(), pred() not modified
X = np.array([[0,0],[0,1],[1,0],[1,1]]) # features 4 * 2
Y = np.array([[0],[1],[1],[0]]) # labels 4 * 1
W1 = np.zeros((2,2)) # map from input to hidden
b1 = np.zeros((2,1)) # bias1
W2 = np.zeros((1,2)) # map from hidden to output
b2 = np.zeros((1,1)) # bias2
epoch = 2000 # maximum training turns
for turn in range(epoch):
print('turn:',turn,end=' ')
epoch_cost = 0
for index in range(X.shape[0]):
x = X[index,:].reshape((-1,1))
y = Y[index,:].reshape((-1,1))
a,z,b,y_pred = forward(x,W1,W2,b1,b2) # feed forward
cost = -y.dot(np.log(y_pred)) - (1-y).dot(np.log(1-y_pred)) # calculate cost
epoch_cost += cost # calculate cumulative cost of this epoch
if turn == 0 or epoch_cost < epoch_cost_best:
epoch_cost_best = epoch_cost
W1_best = W1
b1_best = b1
W2_best = W2
b2_best = b2
epsilon = 0.12 # perturb all weighs between -0.12 ~ 0.12
W1 = W1_best + np.random.random((2,2)) * epsilon * 2 - epsilon
b1 = b1_best + np.random.random((2,1)) * epsilon * 2 - epsilon
W2 = W2_best + np.random.random((1,2)) * epsilon * 2 - epsilon
b2 = b2_best + np.random.random((1,1)) * epsilon * 2 - epsilon
print('cost:',epoch_cost)
print('prediction\n',pred(X,W1_best,W2_best,b1_best,b2_best))
print('ground-truth\n',Y)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.