How to save and load Python Keras BERT model to serialize it?

Question

I've just finished to train my Keras BERT model which treats about multilabel Text classification (percentage unit) and I would like to be able to applied my train model on new (unlabeled) text.

Here are main parts of my model:

   class BertLayer(tf.keras.layers.Layer):
    def __init__(
        self,
        n_fine_tune_layers=10,
        pooling="first",
        bert_path="https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1", #This is the model we choose
        **kwargs,
    ):
        self.n_fine_tune_layers = n_fine_tune_layers
        self.trainable = True
        self.output_size = 768
        self.pooling = pooling
        self.bert_path = bert_path
        if self.pooling not in ["first", "mean"]:
            raise NameError(
                f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
            )

        super(BertLayer, self).__init__(**kwargs)

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_fine_tune_layers': self.n_fine_tune_layers,
            'trainable': self.trainable,
            'output_size': self.output_size,
            'pooling': self.pooling,
            'bert_path': self.bert_path,
        })
        return config
    

    def build(self, input_shape):
        self.bert = hub.Module(
            self.bert_path, trainable=self.trainable, name=f"{self.name}_module"
        )

        # Remove unused layers
        trainable_vars = self.bert.variables
        if self.pooling == "first":
            trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
            trainable_layers = ["pooler/dense"]

        elif self.pooling == "mean":
            trainable_vars = [
                var
                for var in trainable_vars
                if not "/cls/" in var.name and not "/pooler/" in var.name
            ]
            trainable_layers = []
        else:
            raise NameError(
                f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
            )

        # Select how many layers to fine tune
        for i in range(self.n_fine_tune_layers):
            trainable_layers.append(f"encoder/layer_{str(11 - i)}")

        # Update trainable vars to contain only the specified layers
        trainable_vars = [
            var
            for var in trainable_vars
            if any([l in var.name for l in trainable_layers])
        ]

        # Add to trainable weights
        for var in trainable_vars:
            self._trainable_weights.append(var)

        for var in self.bert.variables:
            if var not in self._trainable_weights:
                self._non_trainable_weights.append(var)

        super(BertLayer, self).build(input_shape)

    def call(self, inputs):
        inputs = [K.cast(x, dtype="int32") for x in inputs]
        input_ids, input_mask, segment_ids = inputs
        bert_inputs = dict(
            input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
        )
        if self.pooling == "first":
            pooled = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
                "pooled_output"
            ]
        elif self.pooling == "mean":
            result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
                "sequence_output"
            ]

            mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
            masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
                    tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)
            input_mask = tf.cast(input_mask, tf.float32)
            pooled = masked_reduce_mean(result, input_mask)
        else:
            raise NameError(f"Undefined pooling type (must be either first or mean, but is {self.pooling}")

        return pooled

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_size)




   

     # Build model
    def build_model(max_seq_length): 
        in_id = tf.keras.layers.Input(shape=(max_seq_length,), name="input_ids")
        in_mask = tf.keras.layers.Input(shape=(max_seq_length,), name="input_masks")
        in_segment = tf.keras.layers.Input(shape=(max_seq_length,), name="segment_ids")
    
    #This is the input in list form to be fed to the model
    bert_inputs = [in_id, in_mask, in_segment]
    
    bert_output = BertLayer(n_fine_tune_layers=3, pooling="first")(bert_inputs) #calling the preloaded BERT model we have installed
    
    dense = tf.keras.layers.Dense(256, activation='relu')(bert_output) # Attaching our model output here
    pred = tf.keras.layers.Dense(len(possible_labels), activation='sigmoid')(dense)
    
    model = tf.keras.models.Model(inputs=bert_inputs, outputs=pred)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()
    
    return model


    def initialize_vars(sess):
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    sess.run(tf.tables_initializer())
    K.set_session(sess)


    modll_ = build_model(max_seq_length)

    INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_ids (InputLayer)          [(None, 200)]        0                                            
__________________________________________________________________________________________________
input_masks (InputLayer)        [(None, 200)]        0                                            
__________________________________________________________________________________________________
segment_ids (InputLayer)        [(None, 200)]        0                                            
__________________________________________________________________________________________________
bert_layer (BertLayer)          (None, 768)          110104890   input_ids[0][0]                  
                                                                 input_masks[0][0]                
                                                                 segment_ids[0][0]                
__________________________________________________________________________________________________
dense (Dense)                   (None, 256)          196864      bert_layer[0][0]                 
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 17)           4369        dense[0][0]                      
==================================================================================================
Total params: 110,306,123
Trainable params: 22,055,441
Non-trainable params: 88,250,682
__________________________________________________________________________________________________


    # Instantiate variables
initialize_vars(sess)

    train_input_ids.shape, train_input_masks.shape, train_segment_ids.shape, train_labels.shape

    #Defining NBatchLogger for logging details for training
class NBatchLogger(tf.keras.callbacks.Callback):
    def __init__(self, display):
        self.seen = 0
        self.display = display

    def on_batch_end(self, batch, logs={}):
        self.seen += logs.get('size', 0)
        if self.seen % self.display == 0:
            metrics_log = ''
            for k in self.params['metrics']:
                if k in logs:
                    val = logs[k]
                    if abs(val) > 1e-3:
                        metrics_log += ' - %s: %.4f' % (k, val)
                    else:
                        metrics_log += ' - %s: %.4e' % (k, val)
            print('{}/{} ... {}'.format(self.seen,
                                        self.params['samples'],
                                        metrics_log))


    out_batch = NBatchLogger(display=1000)

    modll_.fit([train_input_ids, train_input_masks, train_segment_ids], train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=VALIDATION_SPLIT, callbacks=[out_batch])

Right after this code part I've tried:

modll_.save('model.h5') 

#from tensorflow.keras.models import  load_model
model = load_model('model.h5', custom_objects={'BertLayer': BertLayer})

But I've got:

TypeError: ('Keyword argument not understood:', 'output_size')

During my researches for this error I read that it's simple to only save model weights so I tried:

modll_.save_weights('bert_weights.h5')

modll_.load_weights("bert_weights.h5")

Once modll_ weighs is conserved in the model I applied my model to a new text in order to obtain predication for it:

p_preds = modll_.predict([p_input_ids, p_input_masks, p_segment_ids])

pred_dict = {}
list_p = []

for i in p_preds:
    pred_dict['S1'] = str((i[0]*100)) + ' %'
    pred_dict['S2'] = str(i[1]*100) + ' %'
    pred_dict['S3'] = str(i[2]*100) + ' %'

   list_p.append(pred_dict)

for i in range(len(list_p)):
    print(list_p[i], "\n")

But I got very bad classification (I've tried for many text), which is surprising because during training phase I had 0.94 accuracy. I think that I've done mistakes when I saved and loaded my model. Any idea about that?

Answer 1

When loading the configuration and reconstructing the layers/models, the config dict returned from get_config will be applied to the constructor. In your case, this is similar to: BertLayer(**config) .

Since you don't have output_size in your constructor, you should either add the argument output_size to __init__ (which is recommended because you shouldn't hard-coded the constant), or remove it from get_config .

How to save and load Python Keras BERT model to serialize it?

Question

1 answers

solution1
0 2020-07-25 16:17:09

How to save and load Python Keras BERT model to serialize it?

Question

1 answers

solution1 0 2020-07-25 16:17:09

solution1
0 2020-07-25 16:17:09