简体   繁体   中英

ValueError: Dimensions must be equal, but are 13 and 3076 for 'loss/dense_1_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3076]

I am building a chatbot on top of seq2seq model. There's an error with dimensions. Please help. This is the.py file: https://drive.google.com/drive/u/0/folders/1ljeLb_LYO6qd9azqly31T3luv0oACXfO

import re


lines = open('movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
converse = open('movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')



exchn = []
for conver in converse:
    exchn.append(conver.split(' +++$+++ ')[-1][1:-1].replace("'", " ").replace(",","").split())

diag = {}
for line in lines:
    diag[line.split(' +++$+++ ')[0]] = line.split(' +++$+++ ')[-1]

questions = []
answers = []

for conver in exchn:
    for i in range(len(conver) - 1):
        questions.append(diag[conver[i]])
        answers.append(diag[conver[i+1]])

sorted_ques = []
sorted_ans = []
for i in range(len(questions)):
    if len(questions[i]) < 13:
        sorted_ques.append(questions[i])
        sorted_ans.append(answers[i])



def clean_text(txt):
    txt = txt.lower()
    txt = re.sub(r"i'm", "i am", txt)
    txt = re.sub(r"he's", "he is", txt)
    txt = re.sub(r"she's", "she is", txt)
    txt = re.sub(r"that's", "that is", txt)
    txt = re.sub(r"what's", "what is", txt)
    txt = re.sub(r"where's", "where is", txt)
    txt = re.sub(r"\'ll", " will", txt)
    txt = re.sub(r"\'ve", " have", txt)
    txt = re.sub(r"\'re", " are", txt)
    txt = re.sub(r"\'d", " would", txt)
    txt = re.sub(r"won't", "will not", txt)
    txt = re.sub(r"can't", "can not", txt)
    txt = re.sub(r"[^\w\s]", "", txt)
    return txt

clean_ques = []
clean_ans = []

for line in sorted_ques:
    clean_ques.append(clean_text(line))
        
for line in sorted_ans:
    clean_ans.append(clean_text(line))



## delete
del(answers, questions, line)



for i in range(len(clean_ans)):
    clean_ans[i] = ' '.join(clean_ans[i].split()[:11])



## trimming
clean_ans=clean_ans[:30000]
clean_ques=clean_ques[:30000]
## delete


###  count occurences ###
word2count = {}

for line in clean_ques:
    for word in line.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1
for line in clean_ans:
    for word in line.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

## delete
del(word, line)


###  remove less frequent ###
thresh = 5

vocab = {}
word_num = 0
for word, count in word2count.items():
    if count >= thresh:
        vocab[word] = word_num
        word_num += 1
        
## delete
del(word2count, word, count, thresh)       
del(word_num)        



for i in range(len(clean_ans)):
    clean_ans[i] = '<SOS> ' + clean_ans[i] + ' <EOS>'

tokens = ['<PAD>', '<EOS>', '<OUT>', '<SOS>']
x = len(vocab)
for token in tokens:
    vocab[token] = x
    x += 1
    
    

vocab['cameron'] = vocab['<PAD>']
vocab['<PAD>'] = 0


### inv answers dict ###
inv_vocab = {w:v for v, w in vocab.items()}




encoder_inp = []
for line in clean_ques:
    lst = []
    for word in line.split():
        if word not in vocab:
            lst.append(vocab['<OUT>'])
        else:
            lst.append(vocab[word])
        
    encoder_inp.append(lst)

decoder_inp = []
for line in clean_ans:
    lst = []
    for word in line.split():
        if word not in vocab:
            lst.append(vocab['<OUT>'])
        else:
            lst.append(vocab[word])        
    decoder_inp.append(lst)

from tensorflow.keras.preprocessing.sequence import pad_sequences
encoder_inp = pad_sequences(encoder_inp, 13, padding='post', truncating='post')
decoder_inp = pad_sequences(decoder_inp, 13, padding='post', truncating='post')    

decoder_final_output = []
for i in decoder_inp:
    decoder_final_output.append(i[1:]) 

decoder_final_output = pad_sequences(decoder_final_output, 13, padding='post', truncating='post')

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Input


enc_inp = Input(shape=(13, ))
dec_inp = Input(shape=(13, ))


VOCAB_SIZE = len(vocab)
embed = Embedding(VOCAB_SIZE+1, output_dim=50, 
                  input_length=13,
                  trainable=True                  
                  )


enc_embed = embed(enc_inp)
enc_lstm = LSTM(400, return_sequences=True, return_state=True)
enc_op, h, c = enc_lstm(enc_embed)
enc_states = [h, c]



dec_embed = embed(dec_inp)
dec_lstm = LSTM(400, return_sequences=True, return_state=True)
dec_op, _, _ = dec_lstm(dec_embed, initial_state=enc_states)

dense = Dense(VOCAB_SIZE, activation='softmax')

dense_op = dense(dec_op)

model = Model([enc_inp, dec_inp], dense_op)




model.compile(loss='categorical_crossentropy',metrics=['acc'],optimizer='adam')

model.fit([encoder_inp, decoder_inp],decoder_final_output,epochs=4)

And the error is: Dimensions must be equal, but are 13 and 3027 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3027].


Train on 30000 samples
Epoch 1/4
   32/30000 [..............................] - ETA: 9:26
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1618   try:
-> 1619     c_op = c_api.TF_FinishOperation(op_desc)
   1620   except errors.InvalidArgumentError as e:

InvalidArgumentError: Dimensions must be equal, but are 13 and 3027 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3027].

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-5-75c3ca63e6a3> in <module>
    201 model.compile(loss='categorical_crossentropy',metrics=['acc'],optimizer='adam')
    202 
--> 203 model.fit([encoder_inp, decoder_inp],decoder_final_output,epochs=4)

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    340                 mode=ModeKeys.TRAIN,
    341                 training_context=training_context,
--> 342                 total_epochs=epochs)
    343             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    344 

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    126         step=step, mode=mode, size=current_batch_size) as batch_logs:
    127       try:
--> 128         batch_outs = execution_function(iterator)
    129       except (StopIteration, errors.OutOfRangeError):
    130         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
     96     # `numpy` translates Tensors to values in Eager mode.
     97     return nest.map_structure(_non_none_constant_value,
---> 98                               distributed_function(input_fn))
     99 
    100   return execution_function

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
    566         xla_context.Exit()
    567     else:
--> 568       result = self._call(*args, **kwds)
    569 
    570     if tracing_count == self._get_tracing_count():

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
    613       # This is the first call of __call__, so we have to initialize.
    614       initializers = []
--> 615       self._initialize(args, kwds, add_initializers_to=initializers)
    616     finally:
    617       # At this point we know that the initialization is complete (or less

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
    495     self._concrete_stateful_fn = (
    496         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
--> 497             *args, **kwds))
    498 
    499     def invalid_creator_scope(*unused_args, **unused_kwds):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   2387       args, kwargs = None, None
   2388     with self._lock:
-> 2389       graph_function, _, _ = self._maybe_define_function(args, kwargs)
   2390     return graph_function
   2391 

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)
   2701 
   2702       self._function_cache.missed.add(call_context_key)
-> 2703       graph_function = self._create_graph_function(args, kwargs)
   2704       self._function_cache.primary[cache_key] = graph_function
   2705       return graph_function, args, kwargs

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   2591             arg_names=arg_names,
   2592             override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593             capture_by_value=self._capture_by_value),
   2594         self._function_attributes,
   2595         # Tell the ConcreteFunction to clean up its graph once it goes out of

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    976                                           converted_func)
    977 
--> 978       func_outputs = python_func(*func_args, **func_kwargs)
    979 
    980       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds)
    437         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    438         # the function a weak reference to itself to avoid a reference cycle.
--> 439         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    440     weak_wrapped_fn = weakref.ref(wrapped_fn)
    441 

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in distributed_function(input_iterator)
     83     args = _prepare_feed_values(model, input_iterator, mode, strategy)
     84     outputs = strategy.experimental_run_v2(
---> 85         per_replica_function, args=args)
     86     # Out of PerReplica outputs reduce or pick values to return.
     87     all_outputs = dist_utils.unwrap_output_dict(

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
    761       fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
    762                                 convert_by_default=False)
--> 763       return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    764 
    765   def reduce(self, reduce_op, value, axis):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
   1817       kwargs = {}
   1818     with self._container_strategy().scope():
-> 1819       return self._call_for_each_replica(fn, args, kwargs)
   1820 
   1821   def _call_for_each_replica(self, fn, args, kwargs):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
   2162         self._container_strategy(),
   2163         replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2164       return fn(*args, **kwargs)
   2165 
   2166   def _reduce_to(self, reduce_op, value, destinations):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\autograph\impl\api.py in wrapper(*args, **kwargs)
    290   def wrapper(*args, **kwargs):
    291     with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292       return func(*args, **kwargs)
    293 
    294   if inspect.isfunction(func) or inspect.ismethod(func):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
    431       y,
    432       sample_weights=sample_weights,
--> 433       output_loss_metrics=model._output_loss_metrics)
    434 
    435   if reset_metrics:

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
    310           sample_weights=sample_weights,
    311           training=True,
--> 312           output_loss_metrics=output_loss_metrics))
    313   if not isinstance(outs, list):
    314     outs = [outs]

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
    251               output_loss_metrics=output_loss_metrics,
    252               sample_weights=sample_weights,
--> 253               training=training))
    254       if total_loss is None:
    255         raise ValueError('The model cannot be run '

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
    165 
    166         if hasattr(loss_fn, 'reduction'):
--> 167           per_sample_losses = loss_fn.call(targets[i], outs[i])
    168           weighted_losses = losses_utils.compute_weighted_loss(
    169               per_sample_losses,

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\losses.py in call(self, y_true, y_pred)
    219       y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
    220           y_pred, y_true)
--> 221     return self.fn(y_true, y_pred, **self._fn_kwargs)
    222 
    223   def get_config(self):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\losses.py in categorical_crossentropy(y_true, y_pred, from_logits, label_smoothing)
    969   y_true = smart_cond.smart_cond(label_smoothing,
    970                                  _smooth_labels, lambda: y_true)
--> 971   return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    972 
    973 

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\backend.py in categorical_crossentropy(target, output, from_logits, axis)
   4493       epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
   4494       output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
-> 4495       return -math_ops.reduce_sum(target * math_ops.log(output), axis)
   4496     else:
   4497       # When softmax activation function is used for output operation, we

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\math_ops.py in binary_op_wrapper(x, y)
    900     with ops.name_scope(None, op_name, [x, y]) as name:
    901       if isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor):
--> 902         return func(x, y, name=name)
    903       elif not isinstance(y, sparse_tensor.SparseTensor):
    904         try:

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\math_ops.py in _mul_dispatch(x, y, name)
   1199   is_tensor_y = isinstance(y, ops.Tensor)
   1200   if is_tensor_y:
-> 1201     return gen_math_ops.mul(x, y, name=name)
   1202   else:
   1203     assert isinstance(y, sparse_tensor.SparseTensor)  # Case: Dense * Sparse.

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\gen_math_ops.py in mul(x, y, name)
   6122   # Add nodes to the TensorFlow graph.
   6123   _, _, _op, _outputs = _op_def_library._apply_op_helper(
-> 6124         "Mul", x=x, y=y, name=name)
   6125   _result = _outputs[:]
   6126   if _execute.must_record_gradient():

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
    740       op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    741                                  name=scope, input_types=input_types,
--> 742                                  attrs=attr_protos, op_def=op_def)
    743 
    744     # `outputs` is returned as a separate return value so that the output

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\func_graph.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
    593     return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    594         op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 595         compute_device)
    596 
    597   def capture(self, tensor, name=None, shape=None):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
   3320           input_types=input_types,
   3321           original_op=self._default_original_op,
-> 3322           op_def=op_def)
   3323       self._create_op_helper(ret, compute_device=compute_device)
   3324     return ret

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
   1784           op_def, inputs, node_def.attr)
   1785       self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1786                                 control_input_ops)
   1787       name = compat.as_str(node_def.name)
   1788     # pylint: enable=protected-access

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1620   except errors.InvalidArgumentError as e:
   1621     # Convert to ValueError for backwards compatibility.
-> 1622     raise ValueError(str(e))
   1623 
   1624   return c_op

ValueError: Dimensions must be equal, but are 13 and 3027 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3027].

I think that the issue is in the input layers, specifically when you use the next:

enc_inp = Input(shape=(13, ))
dec_inp = Input(shape=(13, ))

The reason is that the Embedding layer acts itself as input layer and what you should pass in is a vector with the dimension of the vocabulary (I assume is 3027) all zeros but the present words as one (1), and take out the Input layers from the model.

Think that it solves the incompatibility among the dimensions.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM