[英]Save and load keras subclassed models
我正在嘗試從關於圖像字幕的 TF 教程中保存和加載 CNN 編碼器和 RNN 解碼器: https://www.tensorflow.org/tutorials/text/image_captioning 。 由於這些是 Keras 模型的子類,而不是功能或順序模型,所以我不能直接使用model.save
和model.load
。
相反,我不得不使用model.save_weights
和model.load_weights
。 問題是model.load_weights
只能在model.build
和model.build
需要input_shape
參數,它必須是元組而不是列表之后才能調用。 然而,對於我們的 RNN 解碼器,我們有多個輸入。 Keras 文檔指定無法使用多個輸入調用model.build
。
有沒有其他方法可以加載 model。
最終我想要一個更小的 python 腳本,它可以加載 model 權重並進行推理。 該腳本不應該訓練。
Colab: https://colab.research.google.com/drive/12YtCH2X0pwIBBXPW0TXmeA520MyVv9AF
這是我設法解決該問題的方法。 不是一個很好的解決方案,但有效! 首先將每個權重矩陣保存在.npy
文件中:
for i, layer in enumerate(encoder.layers):
print("Layer %s" %i, layer.name)
for j, w in enumerate(layer.weights):
print(w.shape)
np.save("encoder_layer_weights/layer_%s_%s_weights_%s.npy" %(i, layer.name, j), w.numpy())
for i, layer in enumerate(decoder.layers):
print("Layer %s" %i, layer.name)
for j, w in enumerate(layer.weights):
print(w.shape)
np.save("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(i, layer.name, j), w.numpy())
然后您重新創建子類模型,但這次您為每一層中的每個權重使用初始化器。 這必須小心完成,因為如果形狀不匹配,您的 model 將無法編譯。
class CNN_Encoder(tf.keras.Model):
# Since you have already extracted the features and dumped it using pickle
# This encoder passes those features through a Fully connected layer
def __init__(self, embedding_dim):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim)
C = tf.keras.initializers.Constant
w1, w2 = [np.load("encoder_layer_weights/layer_%s_%s_weights_%s.npy" %(0, "dense", j)) \
for j in range(2)]
self.fc = tf.keras.layers.Dense(embedding_dim, kernel_initializer=C(w1), bias_initializer=C(w2))
def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x
class BahdanauAttention(tf.keras.Model):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
C = tf.keras.initializers.Constant
w1, w2, w3, w4, w5, w6 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(4, "bahdanau_attention", j)) \
for j in range(6)]
self.W1 = tf.keras.layers.Dense(units, kernel_initializer=C(w1), bias_initializer=C(w2))
self.W2 = tf.keras.layers.Dense(units, kernel_initializer=C(w3), bias_initializer=C(w4))
self.V = tf.keras.layers.Dense(1, kernel_initializer=C(w5), bias_initializer=C(w6))
def call(self, features, hidden):
# features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)
# hidden shape == (batch_size, hidden_size)
# hidden_with_time_axis shape == (batch_size, 1, hidden_size)
hidden_with_time_axis = tf.expand_dims(hidden, 1)
# score shape == (batch_size, 64, hidden_size)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
# attention_weights shape == (batch_size, 64, 1)
# you get 1 at the last axis because you are applying score to self.V
attention_weights = tf.nn.softmax(self.V(score), axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class RNN_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_Decoder, self).__init__()
self.units = units
C = tf.keras.initializers.Constant
w_emb = np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(0, "embedding", 0))
w_gru_1, w_gru_2, w_gru_3 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(1, "gru", j)) for j in range(3)]
w1, w2 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(2, "dense_1", j)) for j in range(2)]
w3, w4 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(3, "dense_2", j)) for j in range(2)]
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, embeddings_initializer=C(w_emb))
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
kernel_initializer=C(w_gru_1),
recurrent_initializer=C(w_gru_2),
bias_initializer=C(w_gru_3)
)
self.fc1 = tf.keras.layers.Dense(self.units, kernel_initializer=C(w1), bias_initializer=C(w2))
self.fc2 = tf.keras.layers.Dense(vocab_size, kernel_initializer=C(w3), bias_initializer=C(w4))
self.attention = BahdanauAttention(self.units)
def call(self, x, features, hidden):
# defining attention as a separate model
context_vector, attention_weights = self.attention(features, hidden)
# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the GRU
output, state = self.gru(x)
# shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
# x shape == (batch_size * max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)
return x, state, attention_weights
def reset_state(self, batch_size):
return tf.zeros((batch_size, self.units))
最后,像往常一樣實例化Encoder
和Decoder
類:
encoder = CNN_Encoder(embedding_dim)
decoder = RNN_Decoder(embedding_dim, units, vocab_size)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.