I have the following sequence of layers. Adding additional LSTMs in the mix yields the following error which I cannot really understand.
I'm using python 3.7.3 on Linux Ubuntu x64
GCC 7.4.0
tensorflow-gpu='2.0.0'
print(x_train_uni.shape) # (299980, 20, 1)
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
tf.keras.layers.LSTM(64),
tf.keras.layers.LSTM(32),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.LSTM(16),
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
which yields:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-32-ba40f416ca84> in <module>
6 tf.keras.layers.LSTM(16),
7 tf.keras.layers.LSTM(8),
----> 8 tf.keras.layers.Dense(1, activation='tanh')
9 ])
10
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in __init__(self, layers, name)
112 tf_utils.assert_no_legacy_layers(layers)
113 for layer in layers:
--> 114 self.add(layer)
115
116 @property
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in add(self, layer)
194 # If the model is being built continuously on top of an input layer:
195 # refresh its output.
--> 196 output_tensor = layer(self.outputs[0])
197 if len(nest.flatten(output_tensor)) != 1:
198 raise TypeError('All layers in a Sequential model '
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
621
622 if initial_state is None and constants is None:
--> 623 return super(RNN, self).__call__(inputs, **kwargs)
624
625 # If any of `initial_state` or `constants` are specified and are Keras
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
810 # are casted, not before.
811 input_spec.assert_input_compatibility(self.input_spec, inputs,
--> 812 self.name)
813 graph = backend.get_graph()
814 with graph.as_default(), backend.name_scope(self._name_scope()):
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
175 'expected ndim=' + str(spec.ndim) + ', found ndim=' +
176 str(ndim) + '. Full shape received: ' +
--> 177 str(x.shape.as_list()))
178 if spec.max_ndim is not None:
179 ndim = x.shape.ndims
ValueError: Input 0 of layer lstm_19 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 128]
If however I change the model like so it actually works.
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
# tf.keras.layers.LSTM(64),
# tf.keras.layers.LSTM(32),
# tf.keras.layers.Dropout(0.25),
# tf.keras.layers.LSTM(16),
# tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
What is that I'm missing? Why two or multiple LSTM layers cannot be stacked one on top of another?
LSTM layer requires as an input sequence. However, the default setting in Keras is to return final scalar.
Hence second LSTM in proposed architecture is feed with scalar instead of required sequence.
The solution is to use return_sequences=True
flag (see LSTM arguments in docs ):
import tensorflow as tf
x_train_uni = tf.zeros((100, 20, 1))
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:], return_sequences=True),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(32, return_sequences=True),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.LSTM(16, return_sequences=True),
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.