保存並加載 keras 子類模型

[英]Save and load keras subclassed models

我正在嘗試從關於圖像字幕的 TF 教程中保存和加載 CNN 編碼器和 RNN 解碼器: https://www.tensorflow.org/tutorials/text/image_captioning 由於這些是 Keras 模型的子類,而不是功能或順序模型,所以我不能直接使用model.savemodel.load

相反,我不得不使用model.save_weightsmodel.load_weights 問題是model.load_weights只能在model.buildmodel.build需要input_shape參數,它必須是元組而不是列表之后才能調用。 然而,對於我們的 RNN 解碼器,我們有多個輸入。 Keras 文檔指定無法使用多個輸入調用model.build

有沒有其他方法可以加載 model。

最終我想要一個更小的 python 腳本,它可以加載 model 權重並進行推理。 該腳本不應該訓練。

Colab: https://colab.research.google.com/drive/12YtCH2X0pwIBBXPW0TXmeA520MyVv9AF

這是我設法解決該問題的方法。 不是一個很好的解決方案,但有效! 首先將每個權重矩陣保存在.npy文件中:

for i, layer in enumerate(encoder.layers):
  print("Layer %s" %i, layer.name)
  for j, w in enumerate(layer.weights):
     np.save("encoder_layer_weights/layer_%s_%s_weights_%s.npy" %(i, layer.name, j), w.numpy())

for i, layer in enumerate(decoder.layers):
  print("Layer %s" %i, layer.name)
  for j, w in enumerate(layer.weights):
     np.save("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(i, layer.name, j), w.numpy())

然后您重新創建子類模型,但這次您為每一層中的每個權重使用初始化器 這必須小心完成,因為如果形狀不匹配,您的 model 將無法編譯。

class CNN_Encoder(tf.keras.Model):
    # Since you have already extracted the features and dumped it using pickle
    # This encoder passes those features through a Fully connected layer
    def __init__(self, embedding_dim):
        super(CNN_Encoder, self).__init__()
        # shape after fc == (batch_size, 64, embedding_dim)
        C = tf.keras.initializers.Constant
        w1, w2 = [np.load("encoder_layer_weights/layer_%s_%s_weights_%s.npy" %(0, "dense", j)) \
                                      for j in range(2)]
        self.fc = tf.keras.layers.Dense(embedding_dim, kernel_initializer=C(w1), bias_initializer=C(w2))

    def call(self, x):
        x = self.fc(x)
        x = tf.nn.relu(x)
        return x

class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        C = tf.keras.initializers.Constant
        w1, w2, w3, w4, w5, w6 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(4, "bahdanau_attention", j)) \
                                  for j in range(6)]
        self.W1 = tf.keras.layers.Dense(units, kernel_initializer=C(w1), bias_initializer=C(w2))
        self.W2 = tf.keras.layers.Dense(units, kernel_initializer=C(w3), bias_initializer=C(w4))
        self.V = tf.keras.layers.Dense(1, kernel_initializer=C(w5), bias_initializer=C(w6))

    def call(self, features, hidden):
        # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)

        # hidden shape == (batch_size, hidden_size)
        # hidden_with_time_axis shape == (batch_size, 1, hidden_size)
        hidden_with_time_axis = tf.expand_dims(hidden, 1)

        # score shape == (batch_size, 64, hidden_size)
        score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))

        # attention_weights shape == (batch_size, 64, 1)
        # you get 1 at the last axis because you are applying score to self.V
        attention_weights = tf.nn.softmax(self.V(score), axis=1)

        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

class RNN_Decoder(tf.keras.Model):
    def __init__(self, embedding_dim, units, vocab_size):
        super(RNN_Decoder, self).__init__()
        self.units = units

        C = tf.keras.initializers.Constant
        w_emb = np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(0, "embedding", 0))
        w_gru_1, w_gru_2, w_gru_3 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(1, "gru", j)) for j in range(3)]
        w1, w2 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(2, "dense_1", j)) for j in range(2)]
        w3, w4 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(3, "dense_2", j)) for j in range(2)]

        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, embeddings_initializer=C(w_emb))
        self.gru = tf.keras.layers.GRU(self.units,
        self.fc1 = tf.keras.layers.Dense(self.units, kernel_initializer=C(w1), bias_initializer=C(w2))
        self.fc2 = tf.keras.layers.Dense(vocab_size, kernel_initializer=C(w3), bias_initializer=C(w4))

        self.attention = BahdanauAttention(self.units)

    def call(self, x, features, hidden):
        # defining attention as a separate model
        context_vector, attention_weights = self.attention(features, hidden)

        # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        x = self.embedding(x)

        # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        # passing the concatenated vector to the GRU
        output, state = self.gru(x)

        # shape == (batch_size, max_length, hidden_size)
        x = self.fc1(output)

        # x shape == (batch_size * max_length, hidden_size)
        x = tf.reshape(x, (-1, x.shape[2]))

        # output shape == (batch_size * max_length, vocab)
        x = self.fc2(x)

        return x, state, attention_weights

    def reset_state(self, batch_size):
        return tf.zeros((batch_size, self.units))


encoder = CNN_Encoder(embedding_dim)
decoder = RNN_Decoder(embedding_dim, units, vocab_size)


