ValueError：尺寸必須相等，但對於輸入形狀為 [?,13]、[?,13,3076] 的“loss/dense_1_loss/mul”（操作：“Mul”），尺寸必須是 13 和 3076

Question

我正在 seq2seq model 之上構建一個聊天機器人。 尺寸有誤。 請幫忙。 這是.py文件： https://drive.google.com/drive/u/0/folders/1ljeLb_LYO6qd9azqly31T3luv0oACXfO

import re


lines = open('movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
converse = open('movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')



exchn = []
for conver in converse:
    exchn.append(conver.split(' +++$+++ ')[-1][1:-1].replace("'", " ").replace(",","").split())

diag = {}
for line in lines:
    diag[line.split(' +++$+++ ')[0]] = line.split(' +++$+++ ')[-1]

questions = []
answers = []

for conver in exchn:
    for i in range(len(conver) - 1):
        questions.append(diag[conver[i]])
        answers.append(diag[conver[i+1]])

sorted_ques = []
sorted_ans = []
for i in range(len(questions)):
    if len(questions[i]) < 13:
        sorted_ques.append(questions[i])
        sorted_ans.append(answers[i])



def clean_text(txt):
    txt = txt.lower()
    txt = re.sub(r"i'm", "i am", txt)
    txt = re.sub(r"he's", "he is", txt)
    txt = re.sub(r"she's", "she is", txt)
    txt = re.sub(r"that's", "that is", txt)
    txt = re.sub(r"what's", "what is", txt)
    txt = re.sub(r"where's", "where is", txt)
    txt = re.sub(r"\'ll", " will", txt)
    txt = re.sub(r"\'ve", " have", txt)
    txt = re.sub(r"\'re", " are", txt)
    txt = re.sub(r"\'d", " would", txt)
    txt = re.sub(r"won't", "will not", txt)
    txt = re.sub(r"can't", "can not", txt)
    txt = re.sub(r"[^\w\s]", "", txt)
    return txt

clean_ques = []
clean_ans = []

for line in sorted_ques:
    clean_ques.append(clean_text(line))
        
for line in sorted_ans:
    clean_ans.append(clean_text(line))



## delete
del(answers, questions, line)



for i in range(len(clean_ans)):
    clean_ans[i] = ' '.join(clean_ans[i].split()[:11])



## trimming
clean_ans=clean_ans[:30000]
clean_ques=clean_ques[:30000]
## delete


###  count occurences ###
word2count = {}

for line in clean_ques:
    for word in line.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1
for line in clean_ans:
    for word in line.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

## delete
del(word, line)


###  remove less frequent ###
thresh = 5

vocab = {}
word_num = 0
for word, count in word2count.items():
    if count >= thresh:
        vocab[word] = word_num
        word_num += 1
        
## delete
del(word2count, word, count, thresh)       
del(word_num)        



for i in range(len(clean_ans)):
    clean_ans[i] = '<SOS> ' + clean_ans[i] + ' <EOS>'

tokens = ['<PAD>', '<EOS>', '<OUT>', '<SOS>']
x = len(vocab)
for token in tokens:
    vocab[token] = x
    x += 1
    
    

vocab['cameron'] = vocab['<PAD>']
vocab['<PAD>'] = 0


### inv answers dict ###
inv_vocab = {w:v for v, w in vocab.items()}




encoder_inp = []
for line in clean_ques:
    lst = []
    for word in line.split():
        if word not in vocab:
            lst.append(vocab['<OUT>'])
        else:
            lst.append(vocab[word])
        
    encoder_inp.append(lst)

decoder_inp = []
for line in clean_ans:
    lst = []
    for word in line.split():
        if word not in vocab:
            lst.append(vocab['<OUT>'])
        else:
            lst.append(vocab[word])        
    decoder_inp.append(lst)

from tensorflow.keras.preprocessing.sequence import pad_sequences
encoder_inp = pad_sequences(encoder_inp, 13, padding='post', truncating='post')
decoder_inp = pad_sequences(decoder_inp, 13, padding='post', truncating='post')    

decoder_final_output = []
for i in decoder_inp:
    decoder_final_output.append(i[1:]) 

decoder_final_output = pad_sequences(decoder_final_output, 13, padding='post', truncating='post')

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Input


enc_inp = Input(shape=(13, ))
dec_inp = Input(shape=(13, ))


VOCAB_SIZE = len(vocab)
embed = Embedding(VOCAB_SIZE+1, output_dim=50, 
                  input_length=13,
                  trainable=True                  
                  )


enc_embed = embed(enc_inp)
enc_lstm = LSTM(400, return_sequences=True, return_state=True)
enc_op, h, c = enc_lstm(enc_embed)
enc_states = [h, c]



dec_embed = embed(dec_inp)
dec_lstm = LSTM(400, return_sequences=True, return_state=True)
dec_op, _, _ = dec_lstm(dec_embed, initial_state=enc_states)

dense = Dense(VOCAB_SIZE, activation='softmax')

dense_op = dense(dec_op)

model = Model([enc_inp, dec_inp], dense_op)




model.compile(loss='categorical_crossentropy',metrics=['acc'],optimizer='adam')

model.fit([encoder_inp, decoder_inp],decoder_final_output,epochs=4)

錯誤是：尺寸必須相等，但對於輸入形狀為 [?,13]、[?,13,3027] 的 'loss/dense_loss/mul' （操作：'Mul'），尺寸必須是 13 和 3027。


Train on 30000 samples
Epoch 1/4
   32/30000 [..............................] - ETA: 9:26
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1618   try:
-> 1619     c_op = c_api.TF_FinishOperation(op_desc)
   1620   except errors.InvalidArgumentError as e:

InvalidArgumentError: Dimensions must be equal, but are 13 and 3027 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3027].

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-5-75c3ca63e6a3> in <module>
    201 model.compile(loss='categorical_crossentropy',metrics=['acc'],optimizer='adam')
    202 
--> 203 model.fit([encoder_inp, decoder_inp],decoder_final_output,epochs=4)

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    340                 mode=ModeKeys.TRAIN,
    341                 training_context=training_context,
--> 342                 total_epochs=epochs)
    343             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    344 

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    126         step=step, mode=mode, size=current_batch_size) as batch_logs:
    127       try:
--> 128         batch_outs = execution_function(iterator)
    129       except (StopIteration, errors.OutOfRangeError):
    130         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
     96     # `numpy` translates Tensors to values in Eager mode.
     97     return nest.map_structure(_non_none_constant_value,
---> 98                               distributed_function(input_fn))
     99 
    100   return execution_function

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
    566         xla_context.Exit()
    567     else:
--> 568       result = self._call(*args, **kwds)
    569 
    570     if tracing_count == self._get_tracing_count():

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
    613       # This is the first call of __call__, so we have to initialize.
    614       initializers = []
--> 615       self._initialize(args, kwds, add_initializers_to=initializers)
    616     finally:
    617       # At this point we know that the initialization is complete (or less

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
    495     self._concrete_stateful_fn = (
    496         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
--> 497             *args, **kwds))
    498 
    499     def invalid_creator_scope(*unused_args, **unused_kwds):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   2387       args, kwargs = None, None
   2388     with self._lock:
-> 2389       graph_function, _, _ = self._maybe_define_function(args, kwargs)
   2390     return graph_function
   2391 

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)
   2701 
   2702       self._function_cache.missed.add(call_context_key)
-> 2703       graph_function = self._create_graph_function(args, kwargs)
   2704       self._function_cache.primary[cache_key] = graph_function
   2705       return graph_function, args, kwargs

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   2591             arg_names=arg_names,
   2592             override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593             capture_by_value=self._capture_by_value),
   2594         self._function_attributes,
   2595         # Tell the ConcreteFunction to clean up its graph once it goes out of

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    976                                           converted_func)
    977 
--> 978       func_outputs = python_func(*func_args, **func_kwargs)
    979 
    980       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds)
    437         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    438         # the function a weak reference to itself to avoid a reference cycle.
--> 439         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    440     weak_wrapped_fn = weakref.ref(wrapped_fn)
    441 

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in distributed_function(input_iterator)
     83     args = _prepare_feed_values(model, input_iterator, mode, strategy)
     84     outputs = strategy.experimental_run_v2(
---> 85         per_replica_function, args=args)
     86     # Out of PerReplica outputs reduce or pick values to return.
     87     all_outputs = dist_utils.unwrap_output_dict(

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
    761       fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
    762                                 convert_by_default=False)
--> 763       return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    764 
    765   def reduce(self, reduce_op, value, axis):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
   1817       kwargs = {}
   1818     with self._container_strategy().scope():
-> 1819       return self._call_for_each_replica(fn, args, kwargs)
   1820 
   1821   def _call_for_each_replica(self, fn, args, kwargs):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
   2162         self._container_strategy(),
   2163         replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2164       return fn(*args, **kwargs)
   2165 
   2166   def _reduce_to(self, reduce_op, value, destinations):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\autograph\impl\api.py in wrapper(*args, **kwargs)
    290   def wrapper(*args, **kwargs):
    291     with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292       return func(*args, **kwargs)
    293 
    294   if inspect.isfunction(func) or inspect.ismethod(func):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
    431       y,
    432       sample_weights=sample_weights,
--> 433       output_loss_metrics=model._output_loss_metrics)
    434 
    435   if reset_metrics:

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
    310           sample_weights=sample_weights,
    311           training=True,
--> 312           output_loss_metrics=output_loss_metrics))
    313   if not isinstance(outs, list):
    314     outs = [outs]

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
    251               output_loss_metrics=output_loss_metrics,
    252               sample_weights=sample_weights,
--> 253               training=training))
    254       if total_loss is None:
    255         raise ValueError('The model cannot be run '

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
    165 
    166         if hasattr(loss_fn, 'reduction'):
--> 167           per_sample_losses = loss_fn.call(targets[i], outs[i])
    168           weighted_losses = losses_utils.compute_weighted_loss(
    169               per_sample_losses,

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\losses.py in call(self, y_true, y_pred)
    219       y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
    220           y_pred, y_true)
--> 221     return self.fn(y_true, y_pred, **self._fn_kwargs)
    222 
    223   def get_config(self):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\losses.py in categorical_crossentropy(y_true, y_pred, from_logits, label_smoothing)
    969   y_true = smart_cond.smart_cond(label_smoothing,
    970                                  _smooth_labels, lambda: y_true)
--> 971   return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    972 
    973 

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\keras\backend.py in categorical_crossentropy(target, output, from_logits, axis)
   4493       epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
   4494       output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
-> 4495       return -math_ops.reduce_sum(target * math_ops.log(output), axis)
   4496     else:
   4497       # When softmax activation function is used for output operation, we

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\math_ops.py in binary_op_wrapper(x, y)
    900     with ops.name_scope(None, op_name, [x, y]) as name:
    901       if isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor):
--> 902         return func(x, y, name=name)
    903       elif not isinstance(y, sparse_tensor.SparseTensor):
    904         try:

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\math_ops.py in _mul_dispatch(x, y, name)
   1199   is_tensor_y = isinstance(y, ops.Tensor)
   1200   if is_tensor_y:
-> 1201     return gen_math_ops.mul(x, y, name=name)
   1202   else:
   1203     assert isinstance(y, sparse_tensor.SparseTensor)  # Case: Dense * Sparse.

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\ops\gen_math_ops.py in mul(x, y, name)
   6122   # Add nodes to the TensorFlow graph.
   6123   _, _, _op, _outputs = _op_def_library._apply_op_helper(
-> 6124         "Mul", x=x, y=y, name=name)
   6125   _result = _outputs[:]
   6126   if _execute.must_record_gradient():

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
    740       op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    741                                  name=scope, input_types=input_types,
--> 742                                  attrs=attr_protos, op_def=op_def)
    743 
    744     # `outputs` is returned as a separate return value so that the output

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\func_graph.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
    593     return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    594         op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 595         compute_device)
    596 
    597   def capture(self, tensor, name=None, shape=None):

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
   3320           input_types=input_types,
   3321           original_op=self._default_original_op,
-> 3322           op_def=op_def)
   3323       self._create_op_helper(ret, compute_device=compute_device)
   3324     return ret

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
   1784           op_def, inputs, node_def.attr)
   1785       self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1786                                 control_input_ops)
   1787       name = compat.as_str(node_def.name)
   1788     # pylint: enable=protected-access

~\Anaconda3\envs\Pushkar\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1620   except errors.InvalidArgumentError as e:
   1621     # Convert to ValueError for backwards compatibility.
-> 1622     raise ValueError(str(e))
   1623 
   1624   return c_op

ValueError: Dimensions must be equal, but are 13 and 3027 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,13], [?,13,3027].

Answer 1

我認為問題在於輸入層，特別是當您使用下一個時：

enc_inp = Input(shape=(13, ))
dec_inp = Input(shape=(13, ))

原因是Embedding層本身作為輸入層，您應該傳入的是一個向量，其詞匯表的維度（我假設是 3027）全為零，但當前單詞為一 (1)，然后取出Input來自 model 的層。

認為它解決了維度之間的不兼容問題。

ValueError：尺寸必須相等，但對於輸入形狀為 [?,13]、[?,13,3076] 的“loss/dense_1_loss/mul”（操作：“Mul”），尺寸必須是 13 和 3076

問題描述

1 個解決方案

解決方案1
0 2021-01-28 22:08:48

ValueError：尺寸必須相等，但對於輸入形狀為 [?,13]、[?,13,3076] 的“loss/dense_1_loss/mul”（操作：“Mul”），尺寸必須是 13 和 3076

問題描述

1 個解決方案

解決方案1 0 2021-01-28 22:08:48

解決方案1
0 2021-01-28 22:08:48