简体   繁体   中英

Tensoflow seq2seq decoder does not work

I'm trying to follow the seq2seq example in https://www.tensorflow.org/tutorials/seq2seq but with the difference being is that my inputs and outputs are real continuous values as opposed to words in the example.
I have two tensors X and Y which are basic examples of the input and output placeholders. Here's the code that I have right now:

import tensorflow as tf
from tensorflow.contrib import rnn 
import numpy as np
from tensorflow.python.layers.core import Dense

N_HIDDEN = 50

sess = tf.Session()

X = np.random.randn(3, 10, 8)
Y = X + 0.1*np.random.randn(3, 10, 8)

X_lengths = [10, 10, 10] 
BATCH_SIZE = 3

with tf.variable_scope("myrnn", reuse=tf.AUTO_REUSE) as scope:

    encoder_cell = rnn.BasicLSTMCell(N_HIDDEN) 

    encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
        encoder_cell, X, sequence_length=X_lengths, dtype=tf.float64)


    # encoder_outputs: shape=(3, 10, 50)
    # encoder_state: shape=(3, 50)    

    decoder_cell = rnn.BasicLSTMCell(N_HIDDEN)

    # Helper
    helper = tf.contrib.seq2seq.TrainingHelper(
     Y, sequence_length=X_lengths, time_major=True)

    # Decoder
    basic_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state)

    # Dynamic decoding
    outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)

But I'm getting the following error on the last line:

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    685           graph_def_version, node_def_str, input_shapes, input_tensors,
--> 686           input_tensors_as_shapes, status)
    687   except errors.InvalidArgumentError as err:

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
    515             compat.as_text(c_api.TF_Message(self.status.status)),
--> 516             c_api.TF_GetCode(self.status.status))
    517     # Delete the underlying status object from memory otherwise it stays alive

InvalidArgumentError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-20-5d7a4b10734c> in <module>()
     37 
     38     # Dynamic decoding
---> 39     outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
    307         ],
    308         parallel_iterations=parallel_iterations,
--> 309         swap_memory=swap_memory)
    310 
    311     final_outputs_ta = res[1]

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name, maximum_iterations)
   3094         swap_memory=swap_memory)
   3095     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 3096     result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
   3097     if maximum_iterations is not None:
   3098       return result[1]

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
   2872       self.Enter()
   2873       original_body_result, exit_vars = self._BuildLoop(
-> 2874           pred, body, original_loop_vars, loop_vars, shape_invariants)
   2875     finally:
   2876       self.Exit()

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
   2812         flat_sequence=vars_for_body_with_tensor_arrays)
   2813     pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
-> 2814     body_result = body(*packed_vars_for_body)
   2815     post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
   2816     if not nest.is_sequence(body_result):

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
    252       """
    253       (next_outputs, decoder_state, next_inputs,
--> 254        decoder_finished) = decoder.step(time, inputs, state)
    255       if decoder.tracks_own_finished:
    256         next_finished = decoder_finished

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py in step(self, time, inputs, state, name)
    136     """
    137     with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 138       cell_outputs, cell_state = self._cell(inputs, state)
    139       if self._output_layer is not None:
    140         cell_outputs = self._output_layer(cell_outputs)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in __call__(self, inputs, state, scope, *args, **kwargs)
    294     # method.  See the class docstring for more details.
    295     return base_layer.Layer.__call__(self, inputs, state, scope=scope,
--> 296                                      *args, **kwargs)
    297 
    298 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\layers\base.py in __call__(self, inputs, *args, **kwargs)
    694 
    695         if not in_deferred_mode:
--> 696           outputs = self.call(inputs, *args, **kwargs)
    697           if outputs is None:
    698             raise ValueError('A layer\'s `call` method should return a Tensor '

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in call(self, inputs, state)
    575 
    576     gate_inputs = math_ops.matmul(
--> 577         array_ops.concat([inputs, h], 1), self._kernel)
    578     gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
    579 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\array_ops.py in concat(values, axis, name)
   1173               tensor_shape.scalar())
   1174       return identity(values[0], name=scope)
-> 1175   return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
   1176 
   1177 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_array_ops.py in _concat_v2(values, axis, name)
    775   if _ctx.in_graph_mode():
    776     _, _, _op = _op_def_lib._apply_op_helper(
--> 777         "ConcatV2", values=values, axis=axis, name=name)
    778     _result = _op.outputs[:]
    779     _inputs_flat = _op.inputs

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    785         op = g.create_op(op_type_name, inputs, output_types, name=scope,
    786                          input_types=input_types, attrs=attr_protos,
--> 787                          op_def=op_def)
    788       return output_structure, op_def.is_stateful, op
    789 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
   3271         op_def=op_def)
   3272     self._create_op_helper(ret, compute_shapes=compute_shapes,
-> 3273                            compute_device=compute_device)
   3274     return ret
   3275 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _create_op_helper(self, op, compute_shapes, compute_device)
   3311     # compute_shapes argument.
   3312     if op._c_op or compute_shapes:  # pylint: disable=protected-access
-> 3313       set_shapes_for_outputs(op)
   3314     # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed.
   3315     self._add_op(op)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in set_shapes_for_outputs(op)
   2499     return _set_shapes_for_outputs_c_api(op)
   2500   else:
-> 2501     return _set_shapes_for_outputs(op)
   2502 
   2503 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _set_shapes_for_outputs(op)
   2472       shape_func = _call_cpp_shape_fn_and_require_op
   2473 
-> 2474   shapes = shape_func(op)
   2475   if shapes is None:
   2476     raise RuntimeError(

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in call_with_requiring(op)
   2402 
   2403   def call_with_requiring(op):
-> 2404     return call_cpp_shape_fn(op, require_shape_fn=True)
   2405 
   2406   _call_cpp_shape_fn_and_require_op = call_with_requiring

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in call_cpp_shape_fn(op, require_shape_fn)
    625     res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
    626                                   input_tensors_as_shapes_needed,
--> 627                                   require_shape_fn)
    628     if not isinstance(res, dict):
    629       # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    689       missing_shape_fn = True
    690     else:
--> 691       raise ValueError(err.message)
    692 
    693   if missing_shape_fn:

ValueError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.

As you can see there's a dimension issue and I cant figure it out. What am I doing wrong?

Also, I can't really understand the purpose of the helper class (which may possible be the mistake I'm making here). Any explanation is appreciated.

I found a similar question How to use tf.contrib.seq2seq.Helper for non-embedding data? and made a few changes to my code and it seems that it is working. Not sure if this is going to work when I feed the training data. But here's the code that worked for me so far. This is tested in tensorflow 1.6.0.

import tensorflow as tf
import numpy as np
from tensorflow.python.layers import core as layers_core

input_seq_len = 10 # Sequence length as input
input_dim = 8 # Nb of features in input

output_seq_len = 10 # 
output_dim = 8 # nb of features in output

encoder_units = 50 # nb of units in each cell for the encoder
decoder_units = 50 # nb of units in each cell for the decoder

batch_size = 3

graph = tf.Graph()
with graph.as_default():
    learning_ = tf.placeholder(tf.float32)

    with tf.variable_scope('Seq2Seq'):

        # Placeholder for encoder input
        enc_input = \
        tf.placeholder(tf.float32, [None, input_seq_len, input_dim])

        # Placeholder for decoder output - Targets
        target = \
        tf.placeholder(tf.float32, [None, output_seq_len, output_dim])


        ### THE ENCODER

        # Build RNN cell
        encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_units)

        initial_state = \
        encoder_cell.zero_state(batch_size, dtype=tf.float32)

        # Run Dynamic RNN
        #   encoder_outputs: [batch_size, seq_size, num_units]
        #   encoder_state: [batch_size, num_units]
        encoder_outputs, encoder_state = \
        tf.nn.dynamic_rnn(encoder_cell, enc_input, initial_state=initial_state)

        ### THE DECODER

        # Simple Dense layer to project from rnn_dim to the desired output_dim
        projection = \
        layers_core.Dense(output_dim, use_bias=True, name="output_projection")

        helper = \
        tf.contrib.seq2seq.TrainingHelper(target, sequence_length=[output_seq_len for _ in range(batch_size)])

        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_units)                        

        decoder = \
        tf.contrib.seq2seq.BasicDecoder(decoder_cell, initial_state=encoder_state, helper=helper, output_layer=projection)

        outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder)

        # Could be reduced into fewer lines
        diff = tf.square( outputs[0] - target )
        loss = 0.5*tf.reduce_sum( diff, -1 )
        loss = tf.reduce_mean(loss, 1)
        loss = tf.reduce_mean(loss)

        optimizer = tf.train.AdamOptimizer(learning_).minimize(loss)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM