[英]My neural network only predicts one thing
我是深度神經網絡的新手,並試圖從頭開始在 python 上實現它。 我嘗試了很多,但在我的實現中找不到錯誤。 每當我使用我的“預測”function 它總是輸出 0所有這些似乎都完美無缺。 我之前也清理過數據。
import os
os.chdir(r'path where my data is store')#This block of code changes directory to where my data set
創建 dataframe 並將值分配給輸入和目標向量
import pandas as pd
import numpy as np
df = pd.read_csv('clean_data.csv')
X = df[['radius_mean', 'texture_mean', 'perimeter_mean',
'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
'fractal_dimension_se', 'radius_worst', 'texture_worst',
'perimeter_worst', 'area_worst', 'smoothness_worst',
'compactness_worst', 'concavity_worst', 'concave points_worst',
'symmetry_worst', 'fractal_dimension_worst']].values
Y = df['diagnosis'].values
Y = Y.reshape(569,1)
在訓練和測試數據中拆分數據(x 和 y 是訓練集,xt 和 yt 是測試集)
from sklearn.model_selection import train_test_split
x, xt, y, yt = train_test_split(X, Y, test_size = 0.2, random_state = 40)
x, xt, y, yt = x.T, xt.T, y.T, yt.T
初始化參數
def iniparams(layer_dims):
params = {}
for l in range(1,len(layer_dims)):
params['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l - 1])*0.01
params['b' + str(l)] = np.zeros((layer_dims[l],1))
return params
編寫輔助函數 #1
def sigmoid(Z):
return 1/(1 + np.exp(-Z)), Z
#2
def relu(Z):
return np.maximum(0, Z), Z
線性前進
def linearfwd(W, A, b):
Z = np.dot(W, A) + b
linear_cache = (W, A, b)
return Z, linear_cache
前向激活
def fwdactivation(W, A_prev, b, activation):
if activation == 'sigmoid':
Z, linear_cache = linearfwd(W, A_prev, b)
A, activation_cache = sigmoid(Z)
elif activation == 'relu':
Z, linear_cache = linearfwd(W, A_prev, b)
A, activation_cache = relu(Z)
cache = (linear_cache, activation_cache)
return A, cache
前進model
def fwdmodel(x, params):
caches = []
L = len(params)//2
A = x
for l in range(1, L):
A_prev = A
A, cache = fwdactivation(params['W' + str(l)], A_prev, params['b' + str(l)], 'relu')
caches.append(cache)
AL, cache = fwdactivation(params['W' + str(L)], A, params['b' + str(L)], 'sigmoid')
caches.append(cache)
return AL, caches
計算成本
def J(AL, y):
return -np.sum(np.multiply(np.log(AL), y) + np.multiply(np.log(1 - AL), (1 - y)))/y.shape[1]
后向乙狀結腸
def sigmoidbkwd(dA, cache):
Z = cache
s = 1/(1 + np.exp(-Z))
dZ = dA*s*(1 - s)
return dZ
后向relu`
def sigmoidbkwd(dA, cache):
Z = cache
s = 1/(1 + np.exp(-Z))
dZ = dA*s*(1 - s)
return dZ
線性 bkwd
def linearbkwd(dZ, cache):
W, A_prev, b = cache
m = A_prev.shape[1]
dW = np.dot(dZ, A_prev.T)/m
db = np.sum(dZ, axis = 1, keepdims = True)/m
dA_prev = np.dot(W.T, dZ)
return dW, dA_prev, db
后向激活
def bkwdactivation(dA, cache, activation):
linear_cache, activation_cache = cache
if activation == 'sigmoid':
dZ = sigmoidbkwd(dA, activation_cache)
dW, dA_prev, db = linearbkwd(dZ, linear_cache)
if activation == 'relu':
dZ = relubkwd(dA, activation_cache)
dW, dA_prev, db = linearbkwd(dZ, linear_cache)
return dW, dA_prev, db
向后 model
def bkwdmodel(AL, y, cache):
grads = {}
L = len(cache)
dAL = -(np.divide(y, AL) - np.divide(1 - y,1 - AL))
current_cache = cache[L - 1]
grads['dW' + str(L)], grads['dA' + str(L - 1)], grads['db' + str(L)] = bkwdactivation(dAL, current_cache, 'sigmoid')
for l in reversed(range(L - 1)):
current_cache = cache[l]
dW_temp, dA_prev_temp, db_temp = bkwdactivation(grads['dA' + str(l + 1)], current_cache, 'relu')
grads['dW' + str(l + 1)] = dW_temp
grads['dA' + str(l)] = dA_prev_temp
grads['db' + str(l + 1)] = db_temp
return grads
使用梯度下降優化參數
def optimize(grads, params, alpha):
L = len(params)//2
for l in range(1, L + 1):
params['W' + str(l)] = params['W' + str(l)] - alpha*grads['dW' + str(l)]
params['b' + str(l)] = params['b' + str(l)] - alpha*grads['db' + str(l)]
return params
神經網絡 Model
def model(x, y, layer_dims, iters):
costs = []
params = iniparams(layer_dims)
for i in range(1, iters):
AL, caches = fwdmodel(x, params)
cost = J(AL, y)
costs.append(cost)
grads = bkwdmodel(AL, y, caches)
params = optimize(grads, params, 1.2)
if i%100 == 0:
print('Cost after', i,'iterations is:', cost)
costs.append(cost)
return costs, params
計算(成本確實得到緩解Cost Vs Iterations(Y,X) 曲線)
costs, params = model(x, y, [30,8,5,4,4,3,1], 3000)
預測 function
def predict(x,params):
AL, cache = fwdmodel(x,params)
predictions = AL >= 0.5
return predictions
最后當我這樣做時
predictions = predict(xt,params)
predictions
我明白了:
數組([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
請告訴我我錯在哪里
請幫幫我:D
我不明白您為什么要調換您的火車測試拆分 output。 為什么要使用 xt.T、xT 呢? 您應該嘗試打印您的 params(array) output 和 xt(array) output 並查看它們的情況。 它們相似嗎? 您的參數 output 是否給出正確的結果? 檢查所有這些。
我的問題是我的神經網絡太深了。 這是像我這樣的新手容易犯的錯誤。 我發現這個很棒的資源幫助我意識到了這個錯誤: http://theorangeduck.com/page/neural-network-not-working
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.