训练自动编码器时，张量流成本是微不足道的

Question

I am training a autoencoder with tensorflow ,but the cost is nan ,i modify the learning rate and the optimizer, but it not works.i search some result shows that decreasing the lr may help it ,but I change the lr to 0.00001 ,it also wont work . 我正在使用Tensorflow训练自动编码器，但代价是微不足道的，我修改了学习率和优化器，但没有用。我搜索一些结果表明降低lr可能会有所帮助，但我将lr更改为0.00001，它也不会工作。 here is my parameters code : 这是我的参数代码：

learning_rate =0.00001
training_epochs = 2                                                       
batch_size = 900 
display_step =1                                                           
examples_to_show = 10
nextbatch = 0 
#network parameters
n_input = 500
# tf Graph input  
X = tf.placeholder("float",[None,n_input])                                

# hidden layer setting                                                    
n_hidden_1 = 400 # 1st layer num features
n_hidden_2 = 300 # 2nd layer num features                                 
n_hidden_3 = 200 # 3nd layer num features
n_hidden_4 = 100 # 4nd layer num features
weights = {
    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),    # 500 * 400
    'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), # 400 * 300
    'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])), # 300 * 200
    'encoder_h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4])), # 200 * 100

    'decoder_h1': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_3])), # 100 * 200
    'decoder_h2': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_2])), # 200 * 300 
    'decoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])), # 300 * 400
    'decoder_h4': tf.Variable(tf.random_normal([n_hidden_1, n_input])),    # 400 * 500 
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),  # 400
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),  # 300 
'encoder_b3': tf.Variable(tf.random_normal([n_hidden_3])),  # 200
'encoder_b4': tf.Variable(tf.random_normal([n_hidden_4])),  # 100

'decoder_b1': tf.Variable(tf.random_normal([n_hidden_3])),  # 200
'decoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),  # 300
'decoder_b3': tf.Variable(tf.random_normal([n_hidden_1])),  # 400
'decoder_b4': tf.Variable(tf.random_normal([n_input])),     # 500
}

# Building the encoder
def encoder(x):
    print ("i am encoder")
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, 
weights['encoder_h1']),biases['encoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, 
weights['encoder_h2']),biases['encoder_b2']))
    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, 
weights['encoder_h3']),biases['encoder_b3']))
    layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3, 
weights['encoder_h4']),biases['encoder_b4']))
    return layer_4

# Building the decoder
def decoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, 
weights['decoder_h1']),biases['decoder_b1']))
    print("layer1:",layer_1)
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, 
weights['decoder_h2']),biases['decoder_b2']))
    print("layer2:",layer_2)
    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, 
weights['decoder_h3']),biases['decoder_b3']))
    print("layer3:",layer_3)
    layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3, 
weights['decoder_h4']),biases['decoder_b4']))
    print("layer4:",layer_4)
    return layer_4

def normalize(x):
    amin,amax = x.min(),x.max()
    x = (x-amin)/(amax - amin)
    return x
def main():
# Construct model
encoder_op = encoder(X)
print ("encoder_op:",encoder_op)
decoder_op = decoder(encoder_op)
print ("decoder_op:",decoder_op)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X

# Define loss and optimizer, minimize the squared error
#cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
#cost =tf.reduce_sum(tf.pow((y_true - y_pred),2))
cost = tf.reduce_mean(tf.squared_difference(y_true, y_pred))
#optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(cost)

# Launch the graph
with tf.Session() as sess:
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        init = tf.initialize_all_variables()
    else:
        init = tf.global_variables_initializer()
    sess.run(init)
    lenx_train = len(loadvector("x","train"))
    total_batch = int(lenx_train/batch_size)

    # Training cycle
    for epoch in range(training_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = get_next_batch(batch_size)  # max(x) = 1, min(x) = 0
            # Run optimization op (backprop) and cost op (to get loss value)
            op,c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1),"cost=",c)
            print("op:", '%04d' % (epoch+1),"op=",op)
print("Optimization Finished!")
if __name__ == "__main__":
    x_train= loadvector("x","train")
    #x_train = scale(x_train)
    x_train = normalize(x_train)
    y_train = loadvector("y", "train")
    main()

here is the result when i running it 这是我运行它时的结果

Epoch: 0001 cost= 0.373359
op: 0001 op= None
Epoch: 0002 cost= nan
op: 0002 op= None
Optimization Finished!

Answer 1

It is hard to tell for sure without the data, but a possible reason is your implementation of normalize() . 没有数据很难确定，但是可能的原因是您实现了normalize() 。 If you happened to have a tensor of equal values, you will get a bunch of nan s from normalize() . 如果碰巧具有张量相等的张量，则将从normalize()得到一堆nan 。

In general, you can take a look at the second answer in How does one debug NaN values in TensorFlow? 通常，您可以看一下如何在TensorFlow中调试NaN值中的第二个答案。 that suggests to use tf.add_check_numerics_ops() . 建议使用tf.add_check_numerics_ops() 。 This should catch numeric issues in most cases pretty quickly. 在大多数情况下，这应该很快就能解决数字问题。 Also, you can take a look at tfdbg . 另外，您可以看一下tfdbg 。

训练自动编码器时，张量流成本是微不足道的

问题描述

1 个解决方案

解决方案1
0 2017-11-08 07:41:09

训练自动编码器时，张量流成本是微不足道的

问题描述

1 个解决方案

解决方案1 0 2017-11-08 07:41:09

解决方案1
0 2017-11-08 07:41:09