As a part of TPS July challenge , I was trying to implement a custom tensorflow model based on Recurrent neural networks
Idea: I wanted to include an RNN, which predicts values at current iteration, based the model's prediction at previous iteration. So, I implemented a custom Model, which saves output of current iteration, to be fed to the model's LSTM layer in the next Iteration.
However, if I call the model's fit method, I got the following error
InvalidArgumentError Traceback (most recent call last)
<ipython-input-29-0457da000b62> in <module>
----> 1 model.fit(train_x,train_labels,epochs=100)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1098 _r=1):
1099 callbacks.on_train_batch_begin(step)
-> 1100 tmp_logs = self.train_function(iterator)
1101 if data_handler.should_sync:
1102 context.async_wait()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args, **kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
886 # Lifting succeeded, so variables are initialized and we can run the
887 # stateless function.
--> 888 return self._stateless_fn(*args, **kwds)
889 else:
890 _, _, _, filtered_flat_args = \
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
2941 filtered_flat_args) = self._maybe_define_function(args, kwargs)
2942 return graph_function._call_flat(
-> 2943 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
2944
2945 @property
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1917 # No tape is watching; skip to running the function.
1918 return self._build_call_outputs(self._inference_function.call(
-> 1919 ctx, args, cancellation_manager=cancellation_manager))
1920 forward_backward = self._select_forward_and_backward_functions(
1921 args,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
558 inputs=args,
559 attrs=attrs,
--> 560 ctx=ctx)
561 else:
562 outputs = execute.execute_with_cancellation(
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: Can not squeeze dim[0], expected a dimension of 1, got 32
[[{{node lstm_model/weight_normalization_15/cond/else/_1/lstm_model/weight_normalization_15/cond/data_dep_init/moments/Squeeze}}]] [Op:__inference_train_function_11775]
Function call stack:
train_function
Is my method of utilizing the model correct? If not, what would be a better implementation of my Idea?
My Code:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
train_labels = train_data[['target_carbon_monoxide','target_benzene','target_nitrogen_oxides']].copy()
train_x = train_data.drop(['target_carbon_monoxide','target_benzene','target_nitrogen_oxides','date_time'],axis=1)
train_x.head()
train_labels = np.asarray(train_labels).reshape(-1,1,3)
curroutput = tf.Variable(shape=(1,3),initial_value=[[0.0,0.0,0.0]],dtype=tf.float32)
class CompleteModel(keras.Model):
def train_step(self, data):
x,y = data
# x = tf.reshape(self.curroutput,shape=(1,1,3))
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self.compiled_loss(y_pred,y, regularization_losses=self.losses)
global curroutput
curroutput.assign(y_pred)
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.compiled_metrics.update_state(y, y_pred)
return {m.name: m.result() for m in self.metrics}
class RNNInputLayer(keras.layers.Layer):
def __init__(self):
super(RNNInputLayer,self).__init__()
def call(self,inputs):
global curroutput
return tf.reshape(curroutput,shape=(1,1,3))
def make_model():
input_layer = layers.Input(shape=8,batch_size=1)
dense_in = tfa.layers.WeightNormalization(layers.Dense(16,activation='selu'))(input_layer)
dense_in2 = tfa.layers.WeightNormalization(layers.Dense(32,activation='selu'))(dense_in)
dense_out = tfa.layers.WeightNormalization(layers.Dense(8,activation='selu'))(dense_in)
rnn_input = RNNInputLayer()(input_layer)
lstm_layer = layers.LSTM(units=16,input_shape=(1,3))(rnn_input)
lstm_dense = tfa.layers.WeightNormalization(layers.Dense(16,activation='selu'))(lstm_layer)
finalconcat = layers.Concatenate()([dense_out,lstm_dense])
final_dense = tfa.layers.WeightNormalization(layers.Dense(16,activation='selu'))(finalconcat)
output_layer = layers.Dense(3)(final_dense)
model = CompleteModel(inputs=input_layer,outputs=output_layer,name='lstm_model')
return model
model = make_model()
model.compile(loss=tf.keras.losses.MeanSquaredLogarithmicError(),optimizer='Adam')
model.fit(train_x,train_labels,epochs=100) #error
As Priya highlighted, the error says dim[0] which refers to the batch dimension so this answer might not solve your error but it will definitely help you implement your model .
Within your make_model()
function, try changing dense_out
to
dense_out = tfa.layers.WeightNormalization(layers.Dense(8,activation='selu'))(dense_in2)
Within the make_model()
function, your dense_in2
layer is not used as the input to your dense_output
layer. You probably wanted to use this layer in your model and forgot to add the 2 to the end of the variable name. Additionally, I would assume that the dense_output
layer is looking for an input dimension of 32 neurons which MIGHT the the cause of the error (though this may have been bypassed completely by the way the WeightNormalization layer object is implemented).
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.