Tensoflow seq2seq解码器不起作用

Question

I'm trying to follow the seq2seq example in https://www.tensorflow.org/tutorials/seq2seq but with the difference being is that my inputs and outputs are real continuous values as opposed to words in the example. 我正在尝试遵循https://www.tensorflow.org/tutorials/seq2seq中的seq2seq示例，但不同之处在于，我的输入和输出是真实的连续值，而不是示例中的单词。
I have two tensors X and Y which are basic examples of the input and output placeholders. 我有两个张量X和Y，它们是输入和输出占位符的基本示例。 Here's the code that I have right now: 这是我现在拥有的代码：

import tensorflow as tf
from tensorflow.contrib import rnn 
import numpy as np
from tensorflow.python.layers.core import Dense

N_HIDDEN = 50

sess = tf.Session()

X = np.random.randn(3, 10, 8)
Y = X + 0.1*np.random.randn(3, 10, 8)

X_lengths = [10, 10, 10] 
BATCH_SIZE = 3

with tf.variable_scope("myrnn", reuse=tf.AUTO_REUSE) as scope:

    encoder_cell = rnn.BasicLSTMCell(N_HIDDEN) 

    encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
        encoder_cell, X, sequence_length=X_lengths, dtype=tf.float64)


    # encoder_outputs: shape=(3, 10, 50)
    # encoder_state: shape=(3, 50)    

    decoder_cell = rnn.BasicLSTMCell(N_HIDDEN)

    # Helper
    helper = tf.contrib.seq2seq.TrainingHelper(
     Y, sequence_length=X_lengths, time_major=True)

    # Decoder
    basic_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state)

    # Dynamic decoding
    outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)

But I'm getting the following error on the last line: 但是我在最后一行收到以下错误：

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    685           graph_def_version, node_def_str, input_shapes, input_tensors,
--> 686           input_tensors_as_shapes, status)
    687   except errors.InvalidArgumentError as err:

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
    515             compat.as_text(c_api.TF_Message(self.status.status)),
--> 516             c_api.TF_GetCode(self.status.status))
    517     # Delete the underlying status object from memory otherwise it stays alive

InvalidArgumentError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-20-5d7a4b10734c> in <module>()
     37 
     38     # Dynamic decoding
---> 39     outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
    307         ],
    308         parallel_iterations=parallel_iterations,
--> 309         swap_memory=swap_memory)
    310 
    311     final_outputs_ta = res[1]

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name, maximum_iterations)
   3094         swap_memory=swap_memory)
   3095     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 3096     result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
   3097     if maximum_iterations is not None:
   3098       return result[1]

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
   2872       self.Enter()
   2873       original_body_result, exit_vars = self._BuildLoop(
-> 2874           pred, body, original_loop_vars, loop_vars, shape_invariants)
   2875     finally:
   2876       self.Exit()

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
   2812         flat_sequence=vars_for_body_with_tensor_arrays)
   2813     pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
-> 2814     body_result = body(*packed_vars_for_body)
   2815     post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
   2816     if not nest.is_sequence(body_result):

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
    252       """
    253       (next_outputs, decoder_state, next_inputs,
--> 254        decoder_finished) = decoder.step(time, inputs, state)
    255       if decoder.tracks_own_finished:
    256         next_finished = decoder_finished

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py in step(self, time, inputs, state, name)
    136     """
    137     with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 138       cell_outputs, cell_state = self._cell(inputs, state)
    139       if self._output_layer is not None:
    140         cell_outputs = self._output_layer(cell_outputs)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in __call__(self, inputs, state, scope, *args, **kwargs)
    294     # method.  See the class docstring for more details.
    295     return base_layer.Layer.__call__(self, inputs, state, scope=scope,
--> 296                                      *args, **kwargs)
    297 
    298 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\layers\base.py in __call__(self, inputs, *args, **kwargs)
    694 
    695         if not in_deferred_mode:
--> 696           outputs = self.call(inputs, *args, **kwargs)
    697           if outputs is None:
    698             raise ValueError('A layer\'s `call` method should return a Tensor '

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in call(self, inputs, state)
    575 
    576     gate_inputs = math_ops.matmul(
--> 577         array_ops.concat([inputs, h], 1), self._kernel)
    578     gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
    579 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\array_ops.py in concat(values, axis, name)
   1173               tensor_shape.scalar())
   1174       return identity(values[0], name=scope)
-> 1175   return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
   1176 
   1177 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_array_ops.py in _concat_v2(values, axis, name)
    775   if _ctx.in_graph_mode():
    776     _, _, _op = _op_def_lib._apply_op_helper(
--> 777         "ConcatV2", values=values, axis=axis, name=name)
    778     _result = _op.outputs[:]
    779     _inputs_flat = _op.inputs

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    785         op = g.create_op(op_type_name, inputs, output_types, name=scope,
    786                          input_types=input_types, attrs=attr_protos,
--> 787                          op_def=op_def)
    788       return output_structure, op_def.is_stateful, op
    789 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
   3271         op_def=op_def)
   3272     self._create_op_helper(ret, compute_shapes=compute_shapes,
-> 3273                            compute_device=compute_device)
   3274     return ret
   3275 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _create_op_helper(self, op, compute_shapes, compute_device)
   3311     # compute_shapes argument.
   3312     if op._c_op or compute_shapes:  # pylint: disable=protected-access
-> 3313       set_shapes_for_outputs(op)
   3314     # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed.
   3315     self._add_op(op)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in set_shapes_for_outputs(op)
   2499     return _set_shapes_for_outputs_c_api(op)
   2500   else:
-> 2501     return _set_shapes_for_outputs(op)
   2502 
   2503 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _set_shapes_for_outputs(op)
   2472       shape_func = _call_cpp_shape_fn_and_require_op
   2473 
-> 2474   shapes = shape_func(op)
   2475   if shapes is None:
   2476     raise RuntimeError(

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in call_with_requiring(op)
   2402 
   2403   def call_with_requiring(op):
-> 2404     return call_cpp_shape_fn(op, require_shape_fn=True)
   2405 
   2406   _call_cpp_shape_fn_and_require_op = call_with_requiring

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in call_cpp_shape_fn(op, require_shape_fn)
    625     res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
    626                                   input_tensors_as_shapes_needed,
--> 627                                   require_shape_fn)
    628     if not isinstance(res, dict):
    629       # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    689       missing_shape_fn = True
    690     else:
--> 691       raise ValueError(err.message)
    692 
    693   if missing_shape_fn:

ValueError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.

As you can see there's a dimension issue and I cant figure it out. 如您所见，存在尺寸问题，我无法解决。 What am I doing wrong? 我究竟做错了什么？

Also, I can't really understand the purpose of the helper class (which may possible be the mistake I'm making here). 另外，我不能真正理解辅助程序类的目的（这可能是我在这里犯的错误）。 Any explanation is appreciated. 任何解释表示赞赏。

Answer 1

I found a similar question How to use tf.contrib.seq2seq.Helper for non-embedding data? 我发现了一个类似的问题，如何将tf.contrib.seq2seq.Helper用于非嵌入数据？ and made a few changes to my code and it seems that it is working. 并对我的代码进行了一些更改，看来它可以正常工作。 Not sure if this is going to work when I feed the training data. 当我输入训练数据时，不确定这是否行得通。 But here's the code that worked for me so far. 但这是到目前为止对我有用的代码。 This is tested in tensorflow 1.6.0. 这在tensorflow 1.6.0中进行了测试。

import tensorflow as tf
import numpy as np
from tensorflow.python.layers import core as layers_core

input_seq_len = 10 # Sequence length as input
input_dim = 8 # Nb of features in input

output_seq_len = 10 # 
output_dim = 8 # nb of features in output

encoder_units = 50 # nb of units in each cell for the encoder
decoder_units = 50 # nb of units in each cell for the decoder

batch_size = 3

graph = tf.Graph()
with graph.as_default():
    learning_ = tf.placeholder(tf.float32)

    with tf.variable_scope('Seq2Seq'):

        # Placeholder for encoder input
        enc_input = \
        tf.placeholder(tf.float32, [None, input_seq_len, input_dim])

        # Placeholder for decoder output - Targets
        target = \
        tf.placeholder(tf.float32, [None, output_seq_len, output_dim])


        ### THE ENCODER

        # Build RNN cell
        encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_units)

        initial_state = \
        encoder_cell.zero_state(batch_size, dtype=tf.float32)

        # Run Dynamic RNN
        #   encoder_outputs: [batch_size, seq_size, num_units]
        #   encoder_state: [batch_size, num_units]
        encoder_outputs, encoder_state = \
        tf.nn.dynamic_rnn(encoder_cell, enc_input, initial_state=initial_state)

        ### THE DECODER

        # Simple Dense layer to project from rnn_dim to the desired output_dim
        projection = \
        layers_core.Dense(output_dim, use_bias=True, name="output_projection")

        helper = \
        tf.contrib.seq2seq.TrainingHelper(target, sequence_length=[output_seq_len for _ in range(batch_size)])

        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_units)                        

        decoder = \
        tf.contrib.seq2seq.BasicDecoder(decoder_cell, initial_state=encoder_state, helper=helper, output_layer=projection)

        outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder)

        # Could be reduced into fewer lines
        diff = tf.square( outputs[0] - target )
        loss = 0.5*tf.reduce_sum( diff, -1 )
        loss = tf.reduce_mean(loss, 1)
        loss = tf.reduce_mean(loss)

        optimizer = tf.train.AdamOptimizer(learning_).minimize(loss)

Tensoflow seq2seq解码器不起作用

问题描述

1 个解决方案

解决方案1
0 2018-04-25 19:56:52

Tensoflow seq2seq解码器不起作用

问题描述

1 个解决方案

解决方案1 0 2018-04-25 19:56:52

解决方案1
0 2018-04-25 19:56:52