简体   繁体   English

LSTM 层之后的自定义注意力层在 Keras 中给出 ValueError

[英]Custom attention layer after LSTM layer gives ValueError in Keras

I was trying to use keras to build a customized attention block after LSTM and got an error.我试图在 LSTM 之后使用 keras 构建一个自定义的注意力块,但出现错误。 Without the attention block the code is ok to run.如果没有注意块,代码就可以运行。 The input code is as below, I omitted some irrelevant part.输入代码如下,我省略了一些无关的部分。

import tensorflow as tf
import pandas as pd
import os
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Layer
import numpy as np
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import random
import time
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import backend as K

class attention(Layer):
    def __init__(self, **kwargs):
        super(attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(shape=(input_shape[-1], 1),
                                 initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(input_shape[1], 1),
                                 initializer='zeros', trainable=True)
        super(attention, self).build(input_shape)

    def call(self, x):
        # Alignment scores. Pass them through tanh function
        e = K.tanh(K.dot(x, self.W) + self.b)
        # Remove dimension of size 1
        e = K.squeeze(e, axis=-1)
        # Compute the weights
        alpha = K.softmax(e)
        # Reshape to tensorFlow format
        alpha = K.expand_dims(alpha, axis=-1)
        # Compute the context vector
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

Input_rnn = keras.Input(shape=(None, 1))


LSTM_1 = layers.LSTM(32, activation='relu', return_sequences=True)(Input_rnn)
Dropout_1 = layers.Dropout(0.2)(LSTM_1)
LSTM_2 = layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_1)
Dropout_2 = layers.Dropout(0.2)(LSTM_2)
LSTM_3 = layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_2)
Dropout_3 = layers.Dropout(0.2)(LSTM_3)
attention_layer = attention()(Dropout_3)
Dense_1 = layers.Dense(64, activation='relu')(attention_layer)
Dense_2 = layers.Dense(16, activation='relu')(Dense_1)
Dense_3 = layers.Dense(8, activation='relu')(Dense_2)
Dense_4 = layers.Dense(1, activation='sigmoid')(Dense_3)

The error is:错误是:

2021-11-13 21:06:12.520715: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/cudnn8.0-11.0/lib64:
2021-11-13 21:06:12.520735: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-11-13 21:06:18.627597: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-13 21:06:18.627719: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/cudnn8.0-11.0/lib64:
2021-11-13 21:06:18.627731: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-11-13 21:06:18.627746: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (janus0.ihpc.uts.edu.au): /proc/driver/nvidia/version does not exist
2021-11-13 21:06:18.629462: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
Traceback (most recent call last):
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2874, in zeros
    tensor_shape.TensorShape(shape))
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 356, in _tensor_shape_tensor_conversion_function
    "Cannot convert a partially known TensorShape to a Tensor: %s" % s)
ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 1)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "code/keras_fun.py", line 127, in <module>
    attention_layer = attention()(Dropout_3)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 952, in __call__
    input_list)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1091, in _functional_construction_call
    inputs, input_masks, args, kwargs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 822, in _keras_tensor_symbolic_call
    return self._infer_output_signature(inputs, args, kwargs, input_masks)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 862, in _infer_output_signature
    self._maybe_build(inputs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 2710, in _maybe_build
    self.build(input_shapes)  # pylint:disable=not-callable
  File "code/keras_fun.py", line 34, in build
    initializer='zeros', trainable=True)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 639, in add_weight
    caching_device=caching_device)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 810, in _add_variable_with_custom_getter
    **kwargs_for_getter)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 142, in make_variable
    shape=variable_shape if variable_shape else None)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 260, in __call__
    return cls._variable_v1_call(*args, **kwargs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 221, in _variable_v1_call
    shape=shape)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 199, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variable_scope.py", line 2618, in default_variable_creator
    shape=shape)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 264, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1585, in __init__
    distribute_strategy=distribute_strategy)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1712, in _init_from_args
    initial_value = initial_value()
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/initializers/initializers_v2.py", line 139, in __call__
    return super(Zeros, self).__call__(shape, dtype=_get_dtype(dtype), **kwargs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/init_ops_v2.py", line 154, in __call__
    return array_ops.zeros(shape, dtype)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py", line 201, in wrapper
    return target(*args, **kwargs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2819, in wrapped
    tensor = fun(*args, **kwargs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2877, in zeros
    shape = ops.convert_to_tensor(shape, dtype=dtypes.int32)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py", line 163, in wrapped
    return func(*args, **kwargs)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1540, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 339, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 265, in constant
    allow_broadcast=True)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 276, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 301, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)
  File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.

I can't find where is going wrong.我找不到哪里出了问题。 Not sure if it is something to do with the input shape difference from the attention layer to the dense layer, or from the dropout layer to attention layer.不确定是否与从注意力层到密集层,或从 dropout 层到注意力层的输入形状差异有关。

Which Tensorflow version are you using?您使用的是哪个Tensorflow version I can see there was some tf.keras and tf.keras.layers discrepency.我可以看到存在一些tf.kerastf.keras.layers差异。 I was able to run the above code with few changes to avoid the error using Tensorflow==2.3.0 .我能够使用Tensorflow==2.3.0来运行上面的代码并进行一些更改以避免错误。

Please find below modified code:请找到以下修改后的代码:

import tensorflow as tf
import pandas as pd
import os
from tensorflow import keras
#from tensorflow.keras import layers
from tensorflow.keras.layers import Layer
import numpy as np
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import random
import time
from tensorflow.keras.callbacks import TensorBoard
#from tensorflow.keras import backend as K

class attention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(attention, self).__init__(**kwargs)

    def build(self, input_shape):
      w_init = tf.random_normal_initializer()
      self.W = tf.Variable(initial_value=w_init(shape=(input_shape[-1], 1), dtype="float32"),trainable=True,)
      b_init = tf.zeros_initializer()
      self.b = tf.Variable(initial_value=b_init(shape=(1,), dtype="float32"), trainable=True)
      super(attention, self).build(input_shape)

        #self.W = self.add_weight(shape=(input_shape[-1], 1),tf.random_normal_initializer('random_normal', trainable=True)
        #self.b = self.add_weight(shape=(input_shape[1], 1),tf.zeros_initializer('zeros', trainable=True)

        

    def call(self, x):
        # Alignment scores. Pass them through tanh function
        e = tf.tanh(tf.matmul(x, self.W) + self.b)
        # Remove dimension of size 1
        e = tf.squeeze(e, axis=-1)
        # Compute the weights
        alpha = tf.keras.activations.softmax(e)
        # Reshape to tensorFlow format
        alpha = tf.expand_dims(alpha, axis=-1)
        # Compute the context vector
        context = x * alpha
        context = tf.math.reduce_sum(context, axis=1)
        return context

Input_rnn = tf.keras.Input(shape=(None, 1))


LSTM_1 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Input_rnn)
Dropout_1 = tf.keras.layers.Dropout(0.2)(LSTM_1)
LSTM_2 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_1)
Dropout_2 = tf.keras.layers.Dropout(0.2)(LSTM_2)
LSTM_3 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_2)
Dropout_3 = tf.keras.layers.Dropout(0.2)(LSTM_3)

attention_layer = attention()(Dropout_3)

Dense_1 = tf.keras.layers.Dense(64, activation='relu')(attention_layer)
Dense_2 = tf.keras.layers.Dense(16, activation='relu')(Dense_1)
Dense_3 = tf.keras.layers.Dense(8, activation='relu')(Dense_2)
Dense_4 = tf.keras.layers.Dense(1, activation='sigmoid')(Dense_3)

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM