I've just finished to train my Keras BERT model which treats about multilabel Text classification (percentage unit) and I would like to be able to applied my train model on new (unlabeled) text.
Here are main parts of my model:
class BertLayer(tf.keras.layers.Layer):
def __init__(
self,
n_fine_tune_layers=10,
pooling="first",
bert_path="https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1", #This is the model we choose
**kwargs,
):
self.n_fine_tune_layers = n_fine_tune_layers
self.trainable = True
self.output_size = 768
self.pooling = pooling
self.bert_path = bert_path
if self.pooling not in ["first", "mean"]:
raise NameError(
f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
)
super(BertLayer, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({
'n_fine_tune_layers': self.n_fine_tune_layers,
'trainable': self.trainable,
'output_size': self.output_size,
'pooling': self.pooling,
'bert_path': self.bert_path,
})
return config
def build(self, input_shape):
self.bert = hub.Module(
self.bert_path, trainable=self.trainable, name=f"{self.name}_module"
)
# Remove unused layers
trainable_vars = self.bert.variables
if self.pooling == "first":
trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
trainable_layers = ["pooler/dense"]
elif self.pooling == "mean":
trainable_vars = [
var
for var in trainable_vars
if not "/cls/" in var.name and not "/pooler/" in var.name
]
trainable_layers = []
else:
raise NameError(
f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
)
# Select how many layers to fine tune
for i in range(self.n_fine_tune_layers):
trainable_layers.append(f"encoder/layer_{str(11 - i)}")
# Update trainable vars to contain only the specified layers
trainable_vars = [
var
for var in trainable_vars
if any([l in var.name for l in trainable_layers])
]
# Add to trainable weights
for var in trainable_vars:
self._trainable_weights.append(var)
for var in self.bert.variables:
if var not in self._trainable_weights:
self._non_trainable_weights.append(var)
super(BertLayer, self).build(input_shape)
def call(self, inputs):
inputs = [K.cast(x, dtype="int32") for x in inputs]
input_ids, input_mask, segment_ids = inputs
bert_inputs = dict(
input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
)
if self.pooling == "first":
pooled = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"pooled_output"
]
elif self.pooling == "mean":
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"sequence_output"
]
mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)
input_mask = tf.cast(input_mask, tf.float32)
pooled = masked_reduce_mean(result, input_mask)
else:
raise NameError(f"Undefined pooling type (must be either first or mean, but is {self.pooling}")
return pooled
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_size)
# Build model
def build_model(max_seq_length):
in_id = tf.keras.layers.Input(shape=(max_seq_length,), name="input_ids")
in_mask = tf.keras.layers.Input(shape=(max_seq_length,), name="input_masks")
in_segment = tf.keras.layers.Input(shape=(max_seq_length,), name="segment_ids")
#This is the input in list form to be fed to the model
bert_inputs = [in_id, in_mask, in_segment]
bert_output = BertLayer(n_fine_tune_layers=3, pooling="first")(bert_inputs) #calling the preloaded BERT model we have installed
dense = tf.keras.layers.Dense(256, activation='relu')(bert_output) # Attaching our model output here
pred = tf.keras.layers.Dense(len(possible_labels), activation='sigmoid')(dense)
model = tf.keras.models.Model(inputs=bert_inputs, outputs=pred)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
return model
def initialize_vars(sess):
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
K.set_session(sess)
modll_ = build_model(max_seq_length)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_ids (InputLayer) [(None, 200)] 0
__________________________________________________________________________________________________
input_masks (InputLayer) [(None, 200)] 0
__________________________________________________________________________________________________
segment_ids (InputLayer) [(None, 200)] 0
__________________________________________________________________________________________________
bert_layer (BertLayer) (None, 768) 110104890 input_ids[0][0]
input_masks[0][0]
segment_ids[0][0]
__________________________________________________________________________________________________
dense (Dense) (None, 256) 196864 bert_layer[0][0]
__________________________________________________________________________________________________
dense_1 (Dense) (None, 17) 4369 dense[0][0]
==================================================================================================
Total params: 110,306,123
Trainable params: 22,055,441
Non-trainable params: 88,250,682
__________________________________________________________________________________________________
# Instantiate variables
initialize_vars(sess)
train_input_ids.shape, train_input_masks.shape, train_segment_ids.shape, train_labels.shape
#Defining NBatchLogger for logging details for training
class NBatchLogger(tf.keras.callbacks.Callback):
def __init__(self, display):
self.seen = 0
self.display = display
def on_batch_end(self, batch, logs={}):
self.seen += logs.get('size', 0)
if self.seen % self.display == 0:
metrics_log = ''
for k in self.params['metrics']:
if k in logs:
val = logs[k]
if abs(val) > 1e-3:
metrics_log += ' - %s: %.4f' % (k, val)
else:
metrics_log += ' - %s: %.4e' % (k, val)
print('{}/{} ... {}'.format(self.seen,
self.params['samples'],
metrics_log))
out_batch = NBatchLogger(display=1000)
modll_.fit([train_input_ids, train_input_masks, train_segment_ids], train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=VALIDATION_SPLIT, callbacks=[out_batch])
Right after this code part I've tried:
modll_.save('model.h5')
#from tensorflow.keras.models import load_model
model = load_model('model.h5', custom_objects={'BertLayer': BertLayer})
But I've got:
TypeError: ('Keyword argument not understood:', 'output_size')
During my researches for this error I read that it's simple to only save model weights so I tried:
modll_.save_weights('bert_weights.h5')
modll_.load_weights("bert_weights.h5")
Once modll_ weighs is conserved in the model I applied my model to a new text in order to obtain predication for it:
p_preds = modll_.predict([p_input_ids, p_input_masks, p_segment_ids])
pred_dict = {}
list_p = []
for i in p_preds:
pred_dict['S1'] = str((i[0]*100)) + ' %'
pred_dict['S2'] = str(i[1]*100) + ' %'
pred_dict['S3'] = str(i[2]*100) + ' %'
list_p.append(pred_dict)
for i in range(len(list_p)):
print(list_p[i], "\n")
But I got very bad classification (I've tried for many text), which is surprising because during training phase I had 0.94 accuracy. I think that I've done mistakes when I saved and loaded my model. Any idea about that?
When loading the configuration and reconstructing the layers/models, the config
dict returned from get_config
will be applied to the constructor. In your case, this is similar to: BertLayer(**config)
.
Since you don't have output_size
in your constructor, you should either add the argument output_size
to __init__
(which is recommended because you shouldn't hard-coded the constant), or remove it from get_config
.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.