[英]How to fix the problem of not learning in CNN with Tensorflow?
我正在使用 Tensorflow 創建與 keras 相同的網絡。 問題是結構是相同的,但是神經網絡無法學習並陷入其可學習的過程。
我已經嘗試了一切,但我無法讓神經網絡學習。
#We initialize the input data with placeholders
tf_data = tf.placeholder(tf.float32, shape=(None, HEIGTH, WIDTH, CHANNELS))
tf_labels = tf.placeholder(tf.float32, shape=(None, LABELS))
FILTER1 = (4,4)
STRIDE1 = (2,1)
FILTER2 = (2,1)
STRIDE2 = (1,1)
DEPTH = 32 #32 # Convolutional Kernel depth size == Number of Convolutional Kernels
HIDDEN1 = 128 #1024 # Number of hidden neurons in the fully connected layer
HIDDEN2 = 256
HIDDEN3 = 512
keep_prob1 = 0.4
keep_prob2 = 0.25
keep_prob3 = 0.5
#CNN
w1 = tf.Variable(tf.truncated_normal([FILTER1[0], FILTER1[1], CHANNELS, DEPTH], stddev=0.001))
b1 = tf.Variable(tf.zeros([DEPTH]))
#output 100 , 9 , 32
w2 = tf.Variable(tf.truncated_normal([FILTER2[0], FILTER2[1], DEPTH, 2*DEPTH], stddev=0.001))
b2 = tf.Variable(tf.constant(0.0, shape=[2*DEPTH]))
#output
#FC
w3 = tf.Variable(tf.truncated_normal([2496, HIDDEN1], stddev=0.001))
b3 = tf.Variable(tf.constant(0.0, shape=[HIDDEN1]))
w4 = tf.Variable(tf.truncated_normal([HIDDEN1, HIDDEN2], stddev=0.001))
b4 = tf.Variable(tf.constant(0.0, shape=[HIDDEN2]))
w5 = tf.Variable(tf.truncated_normal([HIDDEN2, HIDDEN3], stddev=0.001))
b5 = tf.Variable(tf.constant(0.0, shape=[HIDDEN3]))
w6 = tf.Variable(tf.truncated_normal([HIDDEN3, LABELS], stddev=0.001))
b6 = tf.Variable(tf.constant(0.0, shape=[LABELS]))
training = tf.placeholder(tf.bool)
def logits(data):
# Convolutional layer 1
x = tf.nn.conv2d(data, w1, [1, STRIDE1[0], STRIDE1[1], 1], padding='SAME')
x = tf.nn.lrn(x,4,bias=0.0, alpha=0.001/9.0, beta=0.75)
x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
x = tf.nn.relu(x + b1)
# Convolutional layer 2
x = tf.nn.conv2d(x, w2, [1, STRIDE2[0], STRIDE2[1], 1], padding='SAME')
x = tf.nn.lrn(x,4,bias=0.0, alpha=0.001/9.0, beta=0.75)
x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
x = tf.nn.relu(x + b2)
# Fully connected layer
layer_shape = x.get_shape()
num_features = layer_shape[1:4].num_elements()
print(num_features)
x = tf.reshape(x, (-1, num_features))
layer_1 = tf.nn.relu(tf.matmul(x, w3) + b3)
drop_out = tf.nn.dropout(layer_1, keep_prob1) # DROP-OUT here
layer_2 = tf.nn.relu(tf.matmul(drop_out, w4) + b4)
drop_out = tf.nn.dropout(layer_2, keep_prob2) # DROP-OUT here
layer_3 = tf.nn.relu(tf.matmul(drop_out, w5) + b5)
drop_out = tf.nn.dropout(layer_3, keep_prob3) # DROP-OUT here
return tf.matmul(drop_out, w6) + b6
# Prediction:
tf_pred = tf.nn.softmax(logits(tf_data))
#We use the categorical cross entropy loss for training the model.
tf_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits(tf_data),
labels=tf_labels))
tf_accuracy = 100*tf.reduce_mean(tf.to_float(tf.equal(tf.argmax(tf_pred, 1), tf.argmax(tf_labels, 1))))
#tf_opt = tf.train.AdadeltaOptimizer()
LR = 0.001 # Learning rate
tf_opt = tf.train.GradientDescentOptimizer(LR)
tf_step = tf_opt.minimize(tf_loss)
EPOCHS =100
STEPS = int(train_data.shape[0]/BATCH) # Number of steps to run
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
init = tf.global_variables_initializer()
session = tf.Session()
session.run(init)
ss = ShuffleSplit(n_splits=STEPS, train_size=BATCH)
ss.get_n_splits(train_data, train_labels)
history = [(0, np.nan, 2)] # Initial Error Measures
for j in range(0,EPOCHS):
for step, (idx, _) in enumerate(ss.split(train_data,train_labels), start=1):
fd = {tf_data:train_data[idx], tf_labels:train_labels[idx]}
session.run(tf_step, feed_dict=fd)
fd = {tf_data:valid_data, tf_labels:valid_labels}
valid_loss, valid_accuracy = session.run([tf_loss, tf_accuracy], feed_dict=fd)
history.append((step, valid_loss, valid_accuracy))
print('EPOCHS %i \t Valid. Acc. = %f'%(j, valid_accuracy), end='\n')
steps, loss, acc = zip(*history)
即使經過大量步驟,它也會陷入相同的精度。
Step 5 Valid. Acc. = 47.000000
Step 10 Valid. Acc. = 52.999996
Step 15 Valid. Acc. = 47.000000
Step 20 Valid. Acc. = 47.000000
Step 25 Valid. Acc. = 47.000000
Step 30 Valid. Acc. = 47.000000
Step 35 Valid. Acc. = 47.000000
Step 40 Valid. Acc. = 47.000000
Step 45 Valid. Acc. = 47.000000
Step 50 Valid. Acc. = 47.000000
Step 55 Valid. Acc. = 47.000000
Step 60 Valid. Acc. = 47.000000
Step 65 Valid. Acc. = 47.000000
Step 70 Valid. Acc. = 47.000000
Step 75 Valid. Acc. = 47.000000
Step 80 Valid. Acc. = 47.000000
Step 85 Valid. Acc. = 47.000000
Step 90 Valid. Acc. = 47.000000
Step 95 Valid. Acc. = 47.000000
Step 100 Valid. Acc. = 47.000000
Step 105 Valid. Acc. = 47.000000
Step 110 Valid. Acc. = 47.000000
Step 115 Valid. Acc. = 47.000000
Step 120 Valid. Acc. = 47.000000
這很可能是因為您在權重初始化中使用的stddev=0.1
實際上是一個巨大的值。
嘗試使用stddev=0.01
(或者甚至 0.001,如果之前的值不起作用)。 您還應該將偏差初始化為 0,而不是1.0
。
在 Keras 中通常沒有類似的問題,因為默認使用(極好的) glorot_uniform
初始化程序。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.