I am building a chatbot on top of seq2seq model. There's an error with dimensions. Please help. This is the.py file: https://drive.google.com/drive/u/0/folders/1ljeLb_LYO6qd9azqly31T3luv0oACXfO
import re
lines = open('movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
converse = open('movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
exchn = []
for conver in converse:
exchn.append(conver.split(' +++$+++ ')[-1][1:-1].replace("'", " ").replace(",","").split())
diag = {}
for line in lines:
diag[line.split(' +++$+++ ')[0]] = line.split(' +++$+++ ')[-1]
questions = []
answers = []
for conver in exchn:
for i in range(len(conver) - 1):
questions.append(diag[conver[i]])
answers.append(diag[conver[i+1]])
sorted_ques = []
sorted_ans = []
for i in range(len(questions)):
if len(questions[i]) < 13:
sorted_ques.append(questions[i])
sorted_ans.append(answers[i])
def clean_text(txt):
txt = txt.lower()
txt = re.sub(r"i'm", "i am", txt)
txt = re.sub(r"he's", "he is", txt)
txt = re.sub(r"she's", "she is", txt)
txt = re.sub(r"that's", "that is", txt)
txt = re.sub(r"what's", "what is", txt)
txt = re.sub(r"where's", "where is", txt)
txt = re.sub(r"\'ll", " will", txt)
txt = re.sub(r"\'ve", " have", txt)
txt = re.sub(r"\'re", " are", txt)
txt = re.sub(r"\'d", " would", txt)
txt = re.sub(r"won't", "will not", txt)
txt = re.sub(r"can't", "can not", txt)
txt = re.sub(r"[^\w\s]", "", txt)
return txt
clean_ques = []
clean_ans = []
for line in sorted_ques:
clean_ques.append(clean_text(line))
for line in sorted_ans:
clean_ans.append(clean_text(line))
## delete
del(answers, questions, line)
for i in range(len(clean_ans)):
clean_ans[i] = ' '.join(clean_ans[i].split()[:11])
## trimming
clean_ans=clean_ans[:30000]
clean_ques=clean_ques[:30000]
## delete
### count occurences ###
word2count = {}
for line in clean_ques:
for word in line.split():
if word not in word2count:
word2count[word] = 1
else:
word2count[word] += 1
for line in clean_ans:
for word in line.split():
if word not in word2count:
word2count[word] = 1
else:
word2count[word] += 1
## delete
del(word, line)
### remove less frequent ###
thresh = 5
vocab = {}
word_num = 0
for word, count in word2count.items():
if count >= thresh:
vocab[word] = word_num
word_num += 1
## delete
del(word2count, word, count, thresh)
del(word_num)
for i in range(len(clean_ans)):
clean_ans[i] = '<SOS> ' + clean_ans[i] + ' <EOS>'
tokens = ['<PAD>', '<EOS>', '<OUT>', '<SOS>']
x = len(vocab)
for token in tokens:
vocab[token] = x
x += 1
vocab['cameron'] = vocab['<PAD>']
vocab['<PAD>'] = 0
### inv answers dict ###
inv_vocab = {w:v for v, w in vocab.items()}
encoder_inp = []
for line in clean_ques:
lst = []
for word in line.split():
if word not in vocab:
lst.append(vocab['<OUT>'])
else:
lst.append(vocab[word])
encoder_inp.append(lst)
decoder_inp = []
for line in clean_ans:
lst = []
for word in line.split():
if word not in vocab:
lst.append(vocab['<OUT>'])
else:
lst.append(vocab[word])
decoder_inp.append(lst)
from tensorflow.keras.preprocessing.sequence import pad_sequences
encoder_inp = pad_sequences(encoder_inp, 13, padding='post', truncating='post')
decoder_inp = pad_sequences(decoder_inp, 13, padding='post', truncating='post')
decoder_final_output = []
for i in decoder_inp:
decoder_final_output.append(i[1:])
decoder_final_output = pad_sequences(decoder_final_output, 13, padding='post', truncating='post')
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Input
enc_inp = Input(shape=(13, ))
dec_inp = Input(shape=(13, ))
VOCAB_SIZE = len(vocab)
embed = Embedding(VOCAB_SIZE+1, output_dim=50,
input_length=13,
trainable=True
)
enc_embed = embed(enc_inp)
enc_lstm = LSTM(400, return_sequences=True, return_state=True)
enc_op, h, c = enc_lstm(enc_embed)
enc_states = [h, c]
dec_embed = embed(dec_inp)
dec_lstm = LSTM(400, return_sequences=True, return_state=True)
dec_op, _, _ = dec_lstm(dec_embed, initial_state=enc_states)
dense = Dense(VOCAB_SIZE, activation='softmax')
dense_op = dense(dec_op)
model = Model([enc_inp, dec_inp], dense_op)
model.compile(loss='categorical_crossentropy',metrics=['acc'],optimizer='adam')
model.fit([encoder_inp, decoder_inp],decoder_final_output,epochs=4)
And the error is: Dimensions must be equal, but are 13 and 3027 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3027].
Train on 30000 samples
Epoch 1/4
32/30000 [..............................] - ETA: 9:26
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1618 try:
-> 1619 c_op = c_api.TF_FinishOperation(op_desc)
1620 except errors.InvalidArgumentError as e:
InvalidArgumentError: Dimensions must be equal, but are 13 and 3027 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3027].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-5-75c3ca63e6a3> in <module>
201 model.compile(loss='categorical_crossentropy',metrics=['acc'],optimizer='adam')
202
--> 203 model.fit([encoder_inp, decoder_inp],decoder_final_output,epochs=4)
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
613 # This is the first call of __call__, so we have to initialize.
614 initializers = []
--> 615 self._initialize(args, kwds, add_initializers_to=initializers)
616 finally:
617 # At this point we know that the initialization is complete (or less
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
495 self._concrete_stateful_fn = (
496 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 497 *args, **kwds))
498
499 def invalid_creator_scope(*unused_args, **unused_kwds):
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2387 args, kwargs = None, None
2388 with self._lock:
-> 2389 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2390 return graph_function
2391
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)
2701
2702 self._function_cache.missed.add(call_context_key)
-> 2703 graph_function = self._create_graph_function(args, kwargs)
2704 self._function_cache.primary[cache_key] = graph_function
2705 return graph_function, args, kwargs
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2591 arg_names=arg_names,
2592 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593 capture_by_value=self._capture_by_value),
2594 self._function_attributes,
2595 # Tell the ConcreteFunction to clean up its graph once it goes out of
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
976 converted_func)
977
--> 978 func_outputs = python_func(*func_args, **func_kwargs)
979
980 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds)
437 # __wrapped__ allows AutoGraph to swap in a converted function. We give
438 # the function a weak reference to itself to avoid a reference cycle.
--> 439 return weak_wrapped_fn().__wrapped__(*args, **kwds)
440 weak_wrapped_fn = weakref.ref(wrapped_fn)
441
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in distributed_function(input_iterator)
83 args = _prepare_feed_values(model, input_iterator, mode, strategy)
84 outputs = strategy.experimental_run_v2(
---> 85 per_replica_function, args=args)
86 # Out of PerReplica outputs reduce or pick values to return.
87 all_outputs = dist_utils.unwrap_output_dict(
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
761 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
762 convert_by_default=False)
--> 763 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
764
765 def reduce(self, reduce_op, value, axis):
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
1817 kwargs = {}
1818 with self._container_strategy().scope():
-> 1819 return self._call_for_each_replica(fn, args, kwargs)
1820
1821 def _call_for_each_replica(self, fn, args, kwargs):
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
2162 self._container_strategy(),
2163 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2164 return fn(*args, **kwargs)
2165
2166 def _reduce_to(self, reduce_op, value, destinations):
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\autograph\impl\api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
431 y,
432 sample_weights=sample_weights,
--> 433 output_loss_metrics=model._output_loss_metrics)
434
435 if reset_metrics:
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
310 sample_weights=sample_weights,
311 training=True,
--> 312 output_loss_metrics=output_loss_metrics))
313 if not isinstance(outs, list):
314 outs = [outs]
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
251 output_loss_metrics=output_loss_metrics,
252 sample_weights=sample_weights,
--> 253 training=training))
254 if total_loss is None:
255 raise ValueError('The model cannot be run '
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
165
166 if hasattr(loss_fn, 'reduction'):
--> 167 per_sample_losses = loss_fn.call(targets[i], outs[i])
168 weighted_losses = losses_utils.compute_weighted_loss(
169 per_sample_losses,
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\losses.py in call(self, y_true, y_pred)
219 y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
220 y_pred, y_true)
--> 221 return self.fn(y_true, y_pred, **self._fn_kwargs)
222
223 def get_config(self):
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\losses.py in categorical_crossentropy(y_true, y_pred, from_logits, label_smoothing)
969 y_true = smart_cond.smart_cond(label_smoothing,
970 _smooth_labels, lambda: y_true)
--> 971 return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
972
973
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\backend.py in categorical_crossentropy(target, output, from_logits, axis)
4493 epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
4494 output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
-> 4495 return -math_ops.reduce_sum(target * math_ops.log(output), axis)
4496 else:
4497 # When softmax activation function is used for output operation, we
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\math_ops.py in binary_op_wrapper(x, y)
900 with ops.name_scope(None, op_name, [x, y]) as name:
901 if isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor):
--> 902 return func(x, y, name=name)
903 elif not isinstance(y, sparse_tensor.SparseTensor):
904 try:
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\math_ops.py in _mul_dispatch(x, y, name)
1199 is_tensor_y = isinstance(y, ops.Tensor)
1200 if is_tensor_y:
-> 1201 return gen_math_ops.mul(x, y, name=name)
1202 else:
1203 assert isinstance(y, sparse_tensor.SparseTensor) # Case: Dense * Sparse.
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\gen_math_ops.py in mul(x, y, name)
6122 # Add nodes to the TensorFlow graph.
6123 _, _, _op, _outputs = _op_def_library._apply_op_helper(
-> 6124 "Mul", x=x, y=y, name=name)
6125 _result = _outputs[:]
6126 if _execute.must_record_gradient():
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
740 op = g._create_op_internal(op_type_name, inputs, dtypes=None,
741 name=scope, input_types=input_types,
--> 742 attrs=attr_protos, op_def=op_def)
743
744 # `outputs` is returned as a separate return value so that the output
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\func_graph.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
593 return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
594 op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 595 compute_device)
596
597 def capture(self, tensor, name=None, shape=None):
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
3320 input_types=input_types,
3321 original_op=self._default_original_op,
-> 3322 op_def=op_def)
3323 self._create_op_helper(ret, compute_device=compute_device)
3324 return ret
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1784 op_def, inputs, node_def.attr)
1785 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1786 control_input_ops)
1787 name = compat.as_str(node_def.name)
1788 # pylint: enable=protected-access
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1620 except errors.InvalidArgumentError as e:
1621 # Convert to ValueError for backwards compatibility.
-> 1622 raise ValueError(str(e))
1623
1624 return c_op
ValueError: Dimensions must be equal, but are 13 and 3027 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3027].
I think that the issue is in the input layers, specifically when you use the next:
enc_inp = Input(shape=(13, ))
dec_inp = Input(shape=(13, ))
The reason is that the Embedding
layer acts itself as input layer and what you should pass in is a vector with the dimension of the vocabulary (I assume is 3027) all zeros but the present words as one (1), and take out the Input
layers from the model.
Think that it solves the incompatibility among the dimensions.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.