I want to include a pre-trained XLNet (or possibly another state of the art transformer) in a model to fine-tune it.
However, it doesn't work when I include it with keras layers.
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModel
inputs = tf.keras.Input(shape=2000, dtype='int32')
x = inputs
xlnetPretrainedModel = TFAutoModel.from_pretrained("xlnet-base-cased")
x = xlnetPretrainedModel(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
x = tf.keras.layers.Dense(32, activation=None)(x)
model = tf.keras.Model(inputs=inputs, outputs=x)
model.compile(optimizer='adam',
loss='mean_squared_error')
model.summary()
The bug is
AttributeError: 'NoneType' object has no attribute 'shape'
at the line
x = xlnetPretrainedModel(x)
So when the model is used on the input layer.
The XLNet model works if used on a numpy array, but then I wouldn't be able to train it.
The full error message is:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-23-d543506f9697> in <module>
5 x = inputs
6 xlnetPretrainedModel = TFAutoModel.from_pretrained("xlnet-base-cased")
----> 7 x = xlnetPretrainedModel(x)
8 x = tf.keras.layers.GlobalAveragePooling1D()(x)
9 x = tf.keras.layers.Dense(32, activation='relu')(x)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
771 not base_layer_utils.is_in_eager_or_tf_function()):
772 with auto_control_deps.AutomaticControlDependencies() as acd:
--> 773 outputs = call_fn(cast_inputs, *args, **kwargs)
774 # Wrap Tensors in `outputs` in `tf.identity` to avoid
775 # circular dependencies.
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
AttributeError: in converted code:
/opt/conda/lib/python3.7/site-packages/transformers/modeling_tf_xlnet.py:810 call *
outputs = self.transformer(inputs, **kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:805 __call__
inputs, outputs, args, kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:2014 _set_connectivity_metadata_
input_tensors=inputs, output_tensors=outputs, arguments=arguments)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:2044 _add_inbound_node
arguments=arguments)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/node.py:110 __init__
self.output_shapes = nest.map_structure(backend.int_shape, output_tensors)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/util/nest.py:568 map_structure
structure[0], [func(*x) for x in entries],
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/util/nest.py:568 <listcomp>
structure[0], [func(*x) for x in entries],
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py:1172 int_shape
shape = x.shape
AttributeError: 'NoneType' object has no attribute 'shape'
or after trying a solution presented here https://github.com/huggingface/transformers/issues/1350 by decoring the call by a tf.function
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-16-c852fba5aa15> in <module>
8 xlnetPretrainedModel = TFAutoModel.from_pretrained("xlnet-base-cased")
9 xlnetPretrainedModel.call = tf.function(xlnetPretrainedModel.transformer.call)
---> 10 x = xlnetPretrainedModel(x)
11
12 x = tf.keras.layers.GlobalAveragePooling1D()(x)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
803 kwargs.pop('mask')
804 inputs, outputs = self._set_connectivity_metadata_(
--> 805 inputs, outputs, args, kwargs)
806 self._handle_activity_regularization(inputs, outputs)
807 self._set_mask_metadata(inputs, outputs, input_masks)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in _set_connectivity_metadata_(self, inputs, outputs, args, kwargs)
2012 # This updates the layer history of the output tensor(s).
2013 self._add_inbound_node(
-> 2014 input_tensors=inputs, output_tensors=outputs, arguments=arguments)
2015 return inputs, outputs
2016
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in _add_inbound_node(self, input_tensors, output_tensors, arguments)
2042 input_tensors=input_tensors,
2043 output_tensors=output_tensors,
-> 2044 arguments=arguments)
2045
2046 # Update tensor history metadata.
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/node.py in __init__(self, outbound_layer, inbound_layers, node_indices, tensor_indices, input_tensors, output_tensors, arguments)
108 self.input_shapes = nest.map_structure(backend.int_shape, input_tensors)
109 # Nested structure of shape tuples, shapes of output_tensors.
--> 110 self.output_shapes = nest.map_structure(backend.int_shape, output_tensors)
111
112 # Optional keyword arguments to layer's `call`.
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/util/nest.py in map_structure(func, *structure, **kwargs)
566
567 return pack_sequence_as(
--> 568 structure[0], [func(*x) for x in entries],
569 expand_composites=expand_composites)
570
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/util/nest.py in <listcomp>(.0)
566
567 return pack_sequence_as(
--> 568 structure[0], [func(*x) for x in entries],
569 expand_composites=expand_composites)
570
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py in int_shape(x)
1170 """
1171 try:
-> 1172 shape = x.shape
1173 if not isinstance(shape, tuple):
1174 shape = tuple(shape.as_list())
AttributeError: 'NoneType' object has no attribute 'shape'
Please, can anyone help me fix this error?
Generally the model output differs from model to model in the Huggingface transformers library. Check the documentation for what values are returned from the XLNet's "call()" function.
The last hidden state can generally be accessed in the following way:
model= TFAutoModel.from_pretrained("xlnet-base-cased")
# 'last_hidden_state' seems to be common to most of the transformer models
# refer: https://huggingface.co/transformers/model_doc/bert.html#tfbertmodel
output = model(tokenizer_ouput).last_hidden_state
x = tf.keras.layers.Dense(32, activation='relu')(x)
# The rest of you model
Making this change should solve your issue.
The XLNet
Model takes in input which has been passed through the XLNet
tokenizer.
Given:
from transformers import XLNetTokenizer
xlnet_tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
For a single forward pass, you can then generate your x
as something like:
x = xlnet_tokenizer('Hello world', padding=True, return_tensors="tf")
After that, you should be able to proceed with:
xlnet_output = xlnetPretrainedModel(x)
To use x = xlnetPretrainedModel(x)
in the functional API definition of your model, the input x
will need to be in a format identical to output from xlnet_tokenizer
.
Which is: {'input_ids': <Tensor>, 'token_type_ids':<Tensor>, 'attention_mask':<Tensor>}
.
You can do that with the functional API like:
input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name="input_ids")
token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name="token_type_ids")
attention_masks = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name="attention_mask")
x = xlnetPretrainedModel({
'input_ids': input_ids,
'token_type_ids':token_type_ids,
'attention_mask':attention_mask})
...
You should be able to proceed like that without any more of such problems.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.