![](/img/trans.png)
[英]ValueError: No gradients provided for any variable - Tensorflow 2.0
[英]No gradients provided for any variable in tensorflow2.0
當我嘗試使用tensorflow2.0
根據TensorFlow
發布的官方指南創建一個變壓器時遇到了一個問題,當我添加一個全連接網絡時,似乎分類損失和平移損失在一些變量。
但是一旦我嘗試添加這兩個損失,所有變量的梯度都會消失。 我不知道,我試圖解決這個問題好幾個星期。 誰能給我一些建議?
@tf.function(input_signature=train_step_signature)
def train_step(group, inp, tar, label):
tar_inp = tar[:, :-1]
tar_real = tar[:, 1:] # sess=tf.compat.v1.Session()
enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
with tf.GradientTape(persistent=True) as tape:
classfication, predictions, _ = transformer(inp, tar_inp,
True,
enc_padding_mask,
combined_mask,
dec_padding_mask)
loss = loss_function(tar_real, predictions)
loss2 = tf.nn.softmax_cross_entropy_with_logits(label, classfication)
#print(loss,loss2)
a=tape.gradient(loss,trainsformer.trainable_variable)
gradients = tape.gradient(loss+loss2, transformer.trainable_variables)
optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
class_loss(loss2)
train_loss(loss)
train_accuracy(tar_real, predictions)
以下是我的錯誤信息
ValueError Traceback (most recent call last)
<ipython-input-2-81054f0385cb> in <module>()
999 # inp -> portuguese, tar -> english
1000 for (batch, (group, inp, tar, label)) in enumerate(train_dataset):
-> 1001 train_step(group, inp, tar, label)
1002 if batch % 50 == 0:
1003 print(
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
501 # This is the first call of __call__, so we have to initialize.
502 initializer_map = object_identity.ObjectIdentityDictionary()
--> 503 self._initialize(args, kwds, add_initializers_to=initializer_map)
504 finally:
505 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
406 self._concrete_stateful_fn = (
407 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 408 *args, **kwds))
409
410 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1846 if self.input_signature:
1847 args, kwargs = None, None
-> 1848 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1849 return graph_function
1850
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2148 graph_function = self._function_cache.primary.get(cache_key, None)
2149 if graph_function is None:
-> 2150 graph_function = self._create_graph_function(args, kwargs)
2151 self._function_cache.primary[cache_key] = graph_function
2152 return graph_function, args, kwargs
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2039 arg_names=arg_names,
2040 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2041 capture_by_value=self._capture_by_value),
2042 self._function_attributes,
2043 # Tell the ConcreteFunction to clean up its graph once it goes out of
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
913 converted_func)
914
--> 915 func_outputs = python_func(*func_args, **func_kwargs)
916
917 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
356 # __wrapped__ allows AutoGraph to swap in a converted function. We give
357 # the function a weak reference to itself to avoid a reference cycle.
--> 358 return weak_wrapped_fn().__wrapped__(*args, **kwds)
359 weak_wrapped_fn = weakref.ref(wrapped_fn)
360
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/func_graph.py in wrapper(*args, **kwargs)
903 except Exception as e: # pylint:disable=broad-except
904 if hasattr(e, "ag_error_metadata"):
--> 905 raise e.ag_error_metadata.to_exception(e)
906 else:
907 raise
ValueError: in converted code:
<ipython-input-1-81054f0385cb>:856 train_step *
optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:427 apply_gradients
grads_and_vars = _filter_grads(grads_and_vars)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:1025 _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['transformer_1/encoder_1/embedding_2/embeddings:0', 'transformer_1/encoder_1/encoder_layer_6/multi_head_attention_18/dense_98/kernel:0', 'transformer_1/encoder_1/encoder_layer_6/multi_head_attention_18/dense_98/bias:0', 'transformer_1/encoder_1/encoder_layer_6/multi_head_attention_18/dense_99/kernel:0', 'transformer_1/encoder_1/encoder_layer_6/multi_head_attention_18/dense_99/bias:0', 'transformer_1/encoder_1/encoder_layer_6/multi_head_attention_18/dense_100/kernel:0', 'transformer_1/encoder_1/encoder_layer_6/multi_head_attention_18/dense_100/bias:0', 'transformer_1/encoder_1/encoder_layer_6/multi_head_attention_18/dense_101/kernel:0', 'transformer_1/encoder_1/encoder_layer_6/multi_head_attention_18/dense_101/bias:0', 'transformer_1/encoder_1/encoder_layer_6/sequential_12/dense_102/kernel:0', 'transformer_1/encoder_1/encoder_layer_6/sequential_12/dense_102/bias:0', 'transformer_1/encoder_1/encoder_layer_6/sequential_12/dense_103/kernel:0', 'transformer_1/encoder_1/encoder_layer_6/sequential_12/dense_103/bias:0', 'transformer_1/encoder_1/encoder_layer_6/layer_normalization_30/gamma:0', 'transformer_1/encoder_1/encoder_layer_6/layer_normalization_30/beta:0', 'transformer_1/encoder_1/encoder_layer_6/layer_normalization_31/gamma:0', 'transformer_1/encoder_1/encoder_layer_6/layer_normalization_31/beta:0', 'transformer_1/encoder_1/encoder_layer_7/multi_head_attention_19/dense_104/kernel:0', 'transformer_1/encoder_1/encoder...
是的,這對GradientTape
來說有點煩人。 您不能對磁帶上下文( with...
)之外的張量做任何事情,否則磁帶將“丟失軌道”。 您可以通過簡單地將添加移動到上下文中來修復它:
with tf.GradientTape(persistent=True) as tape:
classfication, predictions, _ = transformer(inp, tar_inp,
True,
enc_padding_mask,
combined_mask,
dec_padding_mask)
loss = loss_function(tar_real, predictions)
loss2 = tf.nn.softmax_cross_entropy_with_logits(label, classfication)
added_loss = loss + loss2
#print(loss,loss2)
a=tape.gradient(loss,trainsformer.trainable_variable)
gradients = tape.gradient(added_loss, transformer.trainable_variables)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.