[英]Why does my neural network have extremely low weights after a few epochs?
我剛開始學習神經網絡,這是我的第一個。 問題是我擁有的數據越多,在 2-3 個 epoch 之后權重就越低,這是不尋常的,這導致我的 NN 什么也沒學到。
要在 DataSet class 中重新生成,搜索 function CreateData 並將 nbofexample 更改為 20 之類的值,您將看到是否打印了它們在正常范圍內的權重(在 -1 和 1 之間均勻間隔),但是如果您設置 nbofexample到 200 左右,然后僅在 2 或 3 個 epoch 之后,最后一層的大部分權重將與 0 非常接近,並且在訓練的 rest 中它們將停留在該區域。 顯然,這會導致 NN 失敗。
順便說一句,我的 NN 基本上是在分析 0 到 9 之間的數字除以 10 的 arrays 作為歸一化,以檢查數組是否已排序。 在下面的代碼中我放了很多注釋,代碼可以很容易理解。
可能有一個簡單的解決方法,但我就是不明白:(
如果您想嘗試,這里是完整的代碼:(它在 python 順便說一句)
import numpy as np
import time
import random
import time
#This class is only used for creating the data if needed
class DataSet():
#check if sorted
def checkPossibility(A):
return sorted(A) == A
#will be used later for more complex problems (taken from the faster answer of a coding challenge on LeetCode)
#def checkPossibility(A):
# p = None
# for i in range(len(A) - 1):
# if A[i] > A[i+1]:
# if p is not None:
# return False
# p = i
# return (p is None or p == 0 or p == len(A)-2 or
# A[p-1] <= A[p+1] or A[p] <= A[p+2])
#returns inputs and outputs using my poorly written algorithm
def CreateData():
#settings
nbofchar=4
nbofexample=200
#initialize arrays
inputs = [0]*nbofchar;
output = [1]
#handling dumbness
if nbofexample>pow(10,nbofchar):
print("Too much data... resizing to max data")
nbofexample=pow(10,nbofchar)
elif nbofexample==0:
print("You need examples to train! (Error nbofexample==0)")
#if there is more than half of the max possible example being request, then create all possible examples and delete randomly until it's the requested size
if nbofexample>pow(10,nbofchar)/2:
#creating all possible examples
for i in range(1,pow(10,nbofchar)):
new_ex = [int(a) for a in str(i)]
while len(new_ex)<nbofchar:
new_ex=[0]+new_ex
inputs = np.vstack((inputs,np.dot(new_ex,1/10))) #normalization /10 so the value is between 0 and 1 ¯\_(ツ)_/¯
output = np.vstack((output,[int(DataSet.checkPossibility(new_ex))]))
#deleting
while len(inputs)>nbofexample:
index = random.randint(0,len(inputs)-1)
inputs = np.delete(inputs,index)
output = np.delete(output,index)
return inputs, output
#if there is less than half (or half) then, create example randomly until it's the requested size
else:
i=1
while i < nbofexample:
new_ex = [random.randint(0,9) for a in range(nbofchar)]
if sum(np.any(inputs)==new_ex)==0:
i+=1
inputs = np.vstack((inputs,np.dot(new_ex,1/10))) #normalization /10 so the value is between 0 and 1 ¯\_(ツ)_/¯
output = np.vstack((output,[int(DataSet.checkPossibility(new_ex))]))
return inputs, output
#assigning weights to each layer
class NeuLayer():
def __init__(self, nbofneuron, inputsperneuron):
self.weight = 2 * np.random.random((inputsperneuron,nbofneuron))-1
#the actual neural network
class NeuNet():
def __init__(self, layers):
self.layers = layers
def _sigmoid(self, x):
k = 1
return 1 / (1+np.exp(-x/k))
def _sigmoid_derivative(self, x):
return x * (1-x)
def train(self, training_set_inputs, training_set_outputs, nboftime):
#debug
timer1 = 0
if len(self.layers)<2: return
for iteration in range(nboftime):
delta = [0] * len(self.layers)
error = [0] * len(self.layers)
outputlayers = self.think(training_set_inputs)
#find deltas for each layer "i" (to be able to properly change weights)
for i in range(len(self.layers)-1,-1,-1):
if i==len(self.layers)-1:
error[i] = training_set_outputs - outputlayers[i]
else:
error[i] = np.dot(delta[i+1],self.layers[i+1].weight.T)
delta[i] = error[i] * self._sigmoid_derivative(outputlayers[i])
#assign weigths for each layer "i"
for i in range(len(self.layers)):
if i==0:
self.layers[0].weight += np.dot(training_set_inputs.T,delta[0])
else:
self.layers[i].weight += np.dot(outputlayers[i-1].T,delta[i])
#display progression and the test result
if Display_progression:
if timer1<time.time():
timer1=time.time()+delay
value = ((iteration+1)/nboftime)*100
test_input = np.array([.1,.2,.1,.1])
print('%.2f'%value+"% test_input = " + str(test_input) + " test_output = "+ str(self.think(test_input)[-1]))
#return output of each layer from an input
def think(self, input):
outforlayers = [None]*len(self.layers)
outforlayer = input
for i in range(len(self.layers)):
outforlayer = self._sigmoid(np.dot(outforlayer, self.layers[i].weight))
outforlayers[i] = outforlayer
return outforlayers
#datamaker
creating_data=True
train = True
if creating_data:
#creates files with inputs and their expected output
print("Start creating data...")
input, output = DataSet.CreateData();
print("Data created!")
file = open("data_input","wb")
np.save(file, input)
file.close;
file = open("data_output","wb")
np.save(file, output)
file.close;
if train:
default_data_set=False
if default_data_set:
#default training set
inp_training = np.array([[0, 0, 0, 0, 0], [0.1, 0, 0, 0, 0], [0, 0.1, 0, 0, 0], [0.1, 0.1, 0, 0, 0], [0, 0, 0.1, 0, 0], [0.1, 0, 0.1, 0, 0], [0, 0.1, 0.1, 0, 0], [0.1, 0.1, 0.1, 0, 0],
[0, 0, 0, 0.1, 0], [0.1, 0, 0, 0.1, 0], [0, 0.1, 0, 0.1, 0], [0.1, 0.1, 0, 0.1, 0], [0, 0, 0.1, 0.1, 0], [0.1, 0, 0.1, 0.1, 0], [0, 0.1, 0.1, 0.1, 0], [0.1, 0.1, 0.1, 0.1, 0],
[0, 0, 0, 0, 0.1], [0.1, 0, 0, 0, 0.1], [0, 0.1, 0, 0, 0.1], [0.1, 0.1, 0, 0, 0.1], [0, 0, 0.1, 0, 0.1], [0.1, 0, 0.1, 0, 0.1], [0, 0.1, 0.1, 0, 0.1], [0.1, 0.1, 0.1, 0, 0.1],
[0, 0, 0, 0.1, 0.1], [0.1, 0, 0, 0.1, 0.1], [0, 0.1, 0, 0.1, 0.1], [0.1, 0.1, 0, 0.1, 0.1], [0, 0, 0.1, 0.1, 0.1], [0.1, 0, 0.1, 0.1, 0.1], [0, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1]])
out_training = np.array([[0,0,0,0,0,0,0,1,
0,0,0,1,0,1,1,1,
0,0,0,1,0,1,1,1,
0,1,1,1,1,1,1,1]]).T
else:
print("Loading data files...")
file = open("data_input","rb")
inp_training = np.load(file)
file.close;
file = open("data_output","rb")
out_training = np.load(file)
file.close;
print("Done reading from data files!")
#debug
Display_progression = True;
delay = 1 #seconds
#initialize
np.random.seed(5)
netlayer_input = NeuLayer(10,len(inp_training[0]))
netlayer2 = NeuLayer(10,10)
netlayer3 = NeuLayer(10,10)
netlayer4 = NeuLayer(10,10)
netlayer_out = NeuLayer(len(out_training[0]),10)
All_layers = [netlayer_input,netlayer2,netlayer3,netlayer4,netlayer_out]
brain = NeuNet(All_layers)
#train
print("Start training...")
brain.train(inp_training, out_training, 100000)
print("Done!")
#final test
outputfinal = brain.think(np.array([0,.1,.3,.7]))
#output
a = outputfinal[-1] #[-1] so we get the last layer's output(s)
print(a)
注意這是我第一次在 stackoverflow 上提問,所以請告訴我是否遺漏了這個問題的關鍵信息。
神經網絡可能會遇到所謂的梯度消失問題,這是由更經典的激活(如 Sigmoid 或 Tanh)引起的。
用外行的話來說,基本上像 Sigmoid 和 Tanh 這樣的激活確實會擠壓輸入,對嗎? 例如,sigmoid(10) 和 sigmoid(100) 分別是.9999 和 1。 盡管輸入變化很大,但輸出幾乎沒有變化 - function 在這一點上實際上是恆定的。 在 function 幾乎恆定的情況下,其導數趨於零(或非常小的值)。 這些非常小的導數/梯度彼此相乘並實際上變為零,從而阻止您的 model 學習任何東西 - 您的權重卡住並停止更新。
我建議你在自己的時間做一些關於這個主題的進一步閱讀。 在幾種解決方案中,解決此問題的一種方法是使用不同的激活,例如ReLU 。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.