[英]Tensoflow seq2seq decoder does not work
I'm trying to follow the seq2seq example in https://www.tensorflow.org/tutorials/seq2seq but with the difference being is that my inputs and outputs are real continuous values as opposed to words in the example. 我正在尝试遵循https://www.tensorflow.org/tutorials/seq2seq中的seq2seq示例,但不同之处在于,我的输入和输出是真实的连续值,而不是示例中的单词。
I have two tensors X and Y which are basic examples of the input and output placeholders. 我有两个张量X和Y,它们是输入和输出占位符的基本示例。 Here's the code that I have right now: 这是我现在拥有的代码:
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
from tensorflow.python.layers.core import Dense
N_HIDDEN = 50
sess = tf.Session()
X = np.random.randn(3, 10, 8)
Y = X + 0.1*np.random.randn(3, 10, 8)
X_lengths = [10, 10, 10]
BATCH_SIZE = 3
with tf.variable_scope("myrnn", reuse=tf.AUTO_REUSE) as scope:
encoder_cell = rnn.BasicLSTMCell(N_HIDDEN)
encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
encoder_cell, X, sequence_length=X_lengths, dtype=tf.float64)
# encoder_outputs: shape=(3, 10, 50)
# encoder_state: shape=(3, 50)
decoder_cell = rnn.BasicLSTMCell(N_HIDDEN)
# Helper
helper = tf.contrib.seq2seq.TrainingHelper(
Y, sequence_length=X_lengths, time_major=True)
# Decoder
basic_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state)
# Dynamic decoding
outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)
But I'm getting the following error on the last line: 但是我在最后一行收到以下错误:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
685 graph_def_version, node_def_str, input_shapes, input_tensors,
--> 686 input_tensors_as_shapes, status)
687 except errors.InvalidArgumentError as err:
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
515 compat.as_text(c_api.TF_Message(self.status.status)),
--> 516 c_api.TF_GetCode(self.status.status))
517 # Delete the underlying status object from memory otherwise it stays alive
InvalidArgumentError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-20-5d7a4b10734c> in <module>()
37
38 # Dynamic decoding
---> 39 outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
307 ],
308 parallel_iterations=parallel_iterations,
--> 309 swap_memory=swap_memory)
310
311 final_outputs_ta = res[1]
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name, maximum_iterations)
3094 swap_memory=swap_memory)
3095 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 3096 result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
3097 if maximum_iterations is not None:
3098 return result[1]
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2872 self.Enter()
2873 original_body_result, exit_vars = self._BuildLoop(
-> 2874 pred, body, original_loop_vars, loop_vars, shape_invariants)
2875 finally:
2876 self.Exit()
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2812 flat_sequence=vars_for_body_with_tensor_arrays)
2813 pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
-> 2814 body_result = body(*packed_vars_for_body)
2815 post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
2816 if not nest.is_sequence(body_result):
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
252 """
253 (next_outputs, decoder_state, next_inputs,
--> 254 decoder_finished) = decoder.step(time, inputs, state)
255 if decoder.tracks_own_finished:
256 next_finished = decoder_finished
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py in step(self, time, inputs, state, name)
136 """
137 with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 138 cell_outputs, cell_state = self._cell(inputs, state)
139 if self._output_layer is not None:
140 cell_outputs = self._output_layer(cell_outputs)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in __call__(self, inputs, state, scope, *args, **kwargs)
294 # method. See the class docstring for more details.
295 return base_layer.Layer.__call__(self, inputs, state, scope=scope,
--> 296 *args, **kwargs)
297
298
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\layers\base.py in __call__(self, inputs, *args, **kwargs)
694
695 if not in_deferred_mode:
--> 696 outputs = self.call(inputs, *args, **kwargs)
697 if outputs is None:
698 raise ValueError('A layer\'s `call` method should return a Tensor '
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in call(self, inputs, state)
575
576 gate_inputs = math_ops.matmul(
--> 577 array_ops.concat([inputs, h], 1), self._kernel)
578 gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
579
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\array_ops.py in concat(values, axis, name)
1173 tensor_shape.scalar())
1174 return identity(values[0], name=scope)
-> 1175 return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
1176
1177
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_array_ops.py in _concat_v2(values, axis, name)
775 if _ctx.in_graph_mode():
776 _, _, _op = _op_def_lib._apply_op_helper(
--> 777 "ConcatV2", values=values, axis=axis, name=name)
778 _result = _op.outputs[:]
779 _inputs_flat = _op.inputs
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
785 op = g.create_op(op_type_name, inputs, output_types, name=scope,
786 input_types=input_types, attrs=attr_protos,
--> 787 op_def=op_def)
788 return output_structure, op_def.is_stateful, op
789
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
3271 op_def=op_def)
3272 self._create_op_helper(ret, compute_shapes=compute_shapes,
-> 3273 compute_device=compute_device)
3274 return ret
3275
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _create_op_helper(self, op, compute_shapes, compute_device)
3311 # compute_shapes argument.
3312 if op._c_op or compute_shapes: # pylint: disable=protected-access
-> 3313 set_shapes_for_outputs(op)
3314 # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed.
3315 self._add_op(op)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in set_shapes_for_outputs(op)
2499 return _set_shapes_for_outputs_c_api(op)
2500 else:
-> 2501 return _set_shapes_for_outputs(op)
2502
2503
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _set_shapes_for_outputs(op)
2472 shape_func = _call_cpp_shape_fn_and_require_op
2473
-> 2474 shapes = shape_func(op)
2475 if shapes is None:
2476 raise RuntimeError(
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in call_with_requiring(op)
2402
2403 def call_with_requiring(op):
-> 2404 return call_cpp_shape_fn(op, require_shape_fn=True)
2405
2406 _call_cpp_shape_fn_and_require_op = call_with_requiring
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in call_cpp_shape_fn(op, require_shape_fn)
625 res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
626 input_tensors_as_shapes_needed,
--> 627 require_shape_fn)
628 if not isinstance(res, dict):
629 # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
689 missing_shape_fn = True
690 else:
--> 691 raise ValueError(err.message)
692
693 if missing_shape_fn:
ValueError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.
As you can see there's a dimension issue and I cant figure it out. 如您所见,存在尺寸问题,我无法解决。 What am I doing wrong? 我究竟做错了什么?
Also, I can't really understand the purpose of the helper class (which may possible be the mistake I'm making here). 另外,我不能真正理解辅助程序类的目的(这可能是我在这里犯的错误)。 Any explanation is appreciated. 任何解释表示赞赏。
I found a similar question How to use tf.contrib.seq2seq.Helper for non-embedding data? 我发现了一个类似的问题, 如何将tf.contrib.seq2seq.Helper用于非嵌入数据? and made a few changes to my code and it seems that it is working. 并对我的代码进行了一些更改,看来它可以正常工作。 Not sure if this is going to work when I feed the training data. 当我输入训练数据时,不确定这是否行得通。 But here's the code that worked for me so far. 但这是到目前为止对我有用的代码。 This is tested in tensorflow 1.6.0. 这在tensorflow 1.6.0中进行了测试。
import tensorflow as tf
import numpy as np
from tensorflow.python.layers import core as layers_core
input_seq_len = 10 # Sequence length as input
input_dim = 8 # Nb of features in input
output_seq_len = 10 #
output_dim = 8 # nb of features in output
encoder_units = 50 # nb of units in each cell for the encoder
decoder_units = 50 # nb of units in each cell for the decoder
batch_size = 3
graph = tf.Graph()
with graph.as_default():
learning_ = tf.placeholder(tf.float32)
with tf.variable_scope('Seq2Seq'):
# Placeholder for encoder input
enc_input = \
tf.placeholder(tf.float32, [None, input_seq_len, input_dim])
# Placeholder for decoder output - Targets
target = \
tf.placeholder(tf.float32, [None, output_seq_len, output_dim])
### THE ENCODER
# Build RNN cell
encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_units)
initial_state = \
encoder_cell.zero_state(batch_size, dtype=tf.float32)
# Run Dynamic RNN
# encoder_outputs: [batch_size, seq_size, num_units]
# encoder_state: [batch_size, num_units]
encoder_outputs, encoder_state = \
tf.nn.dynamic_rnn(encoder_cell, enc_input, initial_state=initial_state)
### THE DECODER
# Simple Dense layer to project from rnn_dim to the desired output_dim
projection = \
layers_core.Dense(output_dim, use_bias=True, name="output_projection")
helper = \
tf.contrib.seq2seq.TrainingHelper(target, sequence_length=[output_seq_len for _ in range(batch_size)])
decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_units)
decoder = \
tf.contrib.seq2seq.BasicDecoder(decoder_cell, initial_state=encoder_state, helper=helper, output_layer=projection)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder)
# Could be reduced into fewer lines
diff = tf.square( outputs[0] - target )
loss = 0.5*tf.reduce_sum( diff, -1 )
loss = tf.reduce_mean(loss, 1)
loss = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(learning_).minimize(loss)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.