簡體   English   中英

Tensorflow中的批量標准化層未更新其移動平均值和移動方差

[英]Batch normalization layer in Tensorflow is not updating its moving mean and moving variance

批量標准化不保存其移動平均值和移動方差

當我訓練時,我的訓練數據會得到完美的過度擬合(如預期的那樣)。 通過批量標准化,培訓更快,也如預期的那樣。 然而,當在訓練步驟之后,我立即在相同的數據上運行相同的模型 ,其中“is_training”= False,它給出了非常差的結果。 此外,每次我查看moving_mean和moving_variance時,它們都是默認值。 他們從不更新。

(u'main/y/y/moving_mean:0', array([ 0.,  0.], dtype=float32))   
(u'main/y/y/moving_variance:0', array([ 1.,  1.], dtype=float32)) \
(u'main/y/y/moving_mean:0', array([ 0.,  0.], dtype=float32))
(u'main/y/y/moving_variance:0', array([ 1.,  1.], dtype=float32))
700 with generated means (training = true} 1.0 with saved means {training = false} 0.4911

我有update_ops代碼,但它似乎沒有做到這一點。 update_collections = None使其運行,但我被告知這是出於性能原因的次優解決方案。

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    if update_ops:
        updates = tf.group(*update_ops)
        cost = with_dependencies([updates], cost)

我的代碼如下

import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected, softmax, batch_norm
from tensorflow.python.ops.control_flow_ops import with_dependencies
from tensorflow.python.training.adam import AdamOptimizer

batch_size = 100
input_size = 10
noise_strength = 4

class Data(object):
    def __init__(self,obs,gold):
        self.obs=obs
        self.gold=gold

def generate_data(batch_size,input_size,noise_strength):
    input = np.random.rand(batch_size, input_size) * noise_strength
    gold = np.random.randint(0, 2, (input_size,1))
    input = input + gold
    return Data(input,gold)


def ffnn_model(inputs,num_classes,batch_size,is_training,reuse=False):
    output = fully_connected(inputs,
                             num_classes * 2,
                             activation_fn=None,
                             normalizer_fn=batch_norm,
                             normalizer_params={'is_training': is_training, 'reuse': reuse, 'scope': 'y'},
                             reuse=reuse,
                             scope='y'
                             )
    y = softmax(tf.reshape(output, [batch_size, num_classes, 2]))
    return y


#objective function
def objective_function(y,gold):
    indices = tf.stack([tf.range(tf.size(gold)),tf.reshape(gold,[-1])],axis=1)
    scores = tf.gather_nd(tf.reshape(y,[-1,2]),indices=indices)
    # return tf.cast(indices,tf.float32),-tf.reduce_mean(tf.log(scores+1e-6))
    return -tf.reduce_mean(tf.log(scores+1e-6))

def train_op(y,gold):
    cost = objective_function(y,gold)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    if update_ops:
        print "yes to update_ops"
        print update_ops
        updates = tf.group(*update_ops)
        cost = with_dependencies([updates], cost)
    train_step = AdamOptimizer().minimize(cost)

    return train_step

def predictions_op(y):
    return tf.cast(tf.argmax(y, axis=len(y.get_shape()) - 1), dtype=tf.int32)

def accuracy_op(y,gold):
    return tf.reduce_mean(tf.cast(tf.equal(predictions_op(y), gold),tf.float32))

def model(batch_size, num_classes, input_size, scope, reuse):
    with tf.variable_scope(scope) as m:
        if reuse:
            m.reuse_variables()
        is_training = tf.placeholder(tf.bool)

        x = tf.placeholder(tf.float32, shape=[batch_size, input_size])

        y = ffnn_model(x, num_classes=1, batch_size=batch_size, is_training=is_training, reuse=reuse)

        g = tf.placeholder(tf.int32, shape=[batch_size, num_classes])

        return g, x, y, is_training

def train(batch_size=100,input_size = 100):
    scope = "main"

    g, x, y, is_training = model(batch_size, 1, input_size, scope,reuse=None )

    with tf.Session() as sess:
        train_step, accuracy,predictions = train_op(y, g), accuracy_op(y, g), predictions_op(y)
        cost_op = objective_function(y,g)
        init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
        sess.run(init_op)
        accs = []
        accs2 = []
        costs = []
        for i in range(10000):
            data = generate_data(batch_size, input_size, noise_strength)
            _,acc,cost = sess.run([train_step,accuracy,cost_op],feed_dict={x:data.obs,g:data.gold,is_training:True})
            acc2 = sess.run(accuracy, feed_dict={x: data.obs, g: data.gold, is_training: False})
            accs.append(acc)
            accs2.append(acc2)
            costs.append(cost)
            if i%100 == 0:
                # print scurrs
                print i,"with generated means (training = true}",np.mean(accs[-100:]),"with saved means {training = false}",np.mean(accs2[-100:])
                # print sess.run(predictions, feed_dict={x: data.obs, g: data.gold, is_training: False})
                vars = [var for var in tf.global_variables() if 'moving' in var.name]

                rv = sess.run(vars, {is_training: False})
                rt = sess.run(vars, {is_training: True})

                print"\t".join([str((v.name, a)) for a, v in zip(rv, vars)]), \
                    "\n", \
                    "\t".join([str((v.name, a)) for a, v in zip(rt, vars)])


if __name__ == "__main__":
    train()

批處理規范化會創建必須運行的操作才能更新值。 也就是說,它還將它們添加到特定集合中,如果使用tf.contrib.layers.optimize_loss函數,它會為您收集這些函數並在運行此op時運行它們。

所以要解決,替換:

    train_step = AdamOptimizer().minimize(cost)

    train_step = optimize_loss(loss, step, learning_rate, optimizer='ADAM')

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM