如何將 Scikit-Learn-Keras 模型保存到持久性文件中 (pickle/hd5/json/yaml)

Question

我有以下代碼，使用Keras Scikit-Learn Wrapper ：

from keras.models import Sequential
from sklearn import datasets
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
import pickle
import numpy as np
import json

def classifier(X, y):
    """
    Description of classifier
    """
    NOF_ROW, NOF_COL =  X.shape

    def create_model():
        # create model
        model = Sequential()
        model.add(Dense(12, input_dim=NOF_COL, init='uniform', activation='relu'))
        model.add(Dense(6, init='uniform', activation='relu'))
        model.add(Dense(1, init='uniform', activation='sigmoid'))
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    # evaluate using 10-fold cross validation
    seed = 7
    np.random.seed(seed)
    model = KerasClassifier(build_fn=create_model, nb_epoch=150, batch_size=10, verbose=0)
    return model


def main():
    """
    Description of main
    """

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    X = preprocessing.scale(X)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
    model_tt = classifier(X_train, y_train)
    model_tt.fit(X_train,y_train)

    #--------------------------------------------------
    # This fail
    #-------------------------------------------------- 
    filename = 'finalized_model.sav'
    pickle.dump(model_tt, open(filename, 'wb'))
    # load the model from disk
    loaded_model = pickle.load(open(filename, 'rb'))
    result = loaded_model.score(X_test, Y_test)
    print(result)

    #--------------------------------------------------
    # This also fail
    #--------------------------------------------------
    # from keras.models import load_model       
    # model_tt.save('test_model.h5')


    #--------------------------------------------------
    # This works OK 
    #-------------------------------------------------- 
    # print model_tt.score(X_test, y_test)
    # print model_tt.predict_proba(X_test)
    # print model_tt.predict(X_test)


# Output of predict_proba
# 2nd column is the probability that the prediction is 1
# this value is used as final score, which can be used
# with other method as comparison
# [   [ 0.25311464  0.74688536]
#     [ 0.84401423  0.15598579]
#     [ 0.96047372  0.03952631]
#     ...,
#     [ 0.25518912  0.74481088]
#     [ 0.91467732  0.08532269]
#     [ 0.25473493  0.74526507]]

# Output of predict
# [[1]
# [0]
# [0]
# ...,
# [1]
# [0]
# [1]]


if __name__ == '__main__':
    main()

如代碼中所述，它在這一行失敗：

pickle.dump(model_tt, open(filename, 'wb'))

出現此錯誤：

pickle.PicklingError: Can't pickle <function create_model at 0x101c09320>: it's not found as __main__.create_model

我怎樣才能繞過它？

Answer 1

編輯 1 ：關於保存模型的原始答案

使用 HDF5：

# saving model
json_model = model_tt.model.to_json()
open('model_architecture.json', 'w').write(json_model)
# saving weights
model_tt.model.save_weights('model_weights.h5', overwrite=True)


# loading model
from keras.models import model_from_json

model = model_from_json(open('model_architecture.json').read())
model.load_weights('model_weights.h5')

# dont forget to compile your model
model.compile(loss='binary_crossentropy', optimizer='adam')

編輯 2 ：帶有 iris 數據集的完整代碼示例

# Train model and make predictions
import numpy
import pandas
from keras.models import Sequential, model_from_json
from keras.layers import Dense
from keras.utils import np_utils
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# load dataset
iris = datasets.load_iris()
X, Y, labels = iris.data, iris.target, iris.target_names
X = preprocessing.scale(X)

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# convert integers to dummy variables (i.e. one hot encoded)
y = np_utils.to_categorical(encoded_Y)

def build_model():
    # create model
    model = Sequential()
    model.add(Dense(4, input_dim=4, init='normal', activation='relu'))
    model.add(Dense(3, init='normal', activation='sigmoid'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def save_model(model):
    # saving model
    json_model = model.to_json()
    open('model_architecture.json', 'w').write(json_model)
    # saving weights
    model.save_weights('model_weights.h5', overwrite=True)

def load_model():
    # loading model
    model = model_from_json(open('model_architecture.json').read())
    model.load_weights('model_weights.h5')
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model


X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=seed)

# build
model = build_model()
model.fit(X_train, Y_train, nb_epoch=200, batch_size=5, verbose=0)

# save
save_model(model)

# load
model = load_model()

# predictions
predictions = model.predict_classes(X_test, verbose=0)
print(predictions)
# reverse encoding
for pred in predictions:
    print(labels[pred])

請注意，我只使用了 Keras，而不是包裝器。 它只會在簡單的事情中增加一些復雜性。 此外，代碼是自願的，不考慮因素，因此您可以了解全貌。

另外，你說你想輸出 1 或 0。在這個數據集中是不可能的，因為你有 3 個輸出維度和類（Iris-setosa、Iris-versicolor、Iris-virginica）。 如果您只有 2 個類，那么使用 sigmoid 輸出函數，您的輸出暗淡和類將為 0 或 1。

Answer 2

只是添加到 gaarv 的答案 - 如果您不需要模型結構（ model.to_json() ）和權重（ model.save_weights() ）之間的分離，您可以使用以下方法之一：

使用內置的keras.models.save_model和 'keras.models.load_model` 將所有內容一起存儲在 hdf5 文件中。
使用 pickle 將 Model 對象（或任何包含對它的引用的類）序列化為文件/網絡/任何..
不幸的是，Keras 默認不支持pickle。 您可以使用我添加此缺失功能的不完整解決方案。 工作代碼在這里：http: //zachmoshe.com/2017/04/03/pickling-keras-models.html

Answer 3

另一個很好的選擇是在fit模型時使用回調。 特別是ModelCheckpoint回調，如下所示：

from keras.callbacks import ModelCheckpoint
#Create instance of ModelCheckpoint
chk = ModelCheckpoint("myModel.h5", monitor='val_loss', save_best_only=False)
#add that callback to the list of callbacks to pass
callbacks_list = [chk]
#create your model
model_tt = KerasClassifier(build_fn=create_model, nb_epoch=150, batch_size=10)
#fit your model with your data. Pass the callback(s) here
model_tt.fit(X_train,y_train, callbacks=callbacks_list)

這會將每個時期的訓練保存到myModel.h5文件中。 這提供了很大的好處，因為您可以在需要時停止訓練（例如當您看到它開始過度擬合時），並且仍然保留之前的訓練。

請注意，這會將結構和權重保存在同一個hdf5文件中（如 Zach 所示），因此您可以使用keras.models.load_model加載模型。

如果您只想單獨保存權重，則可以在實例化ModelCheckpoint時使用save_weights_only=True參數，使您能夠按照 Gaarv 的解釋加載模型。 從文檔中提取：

save_weights_only：如果為True，則僅保存模型的權重（model.save_weights(filepath)），否則保存完整模型（model.save(filepath)）。

Answer 4

接受的答案太復雜了。 您可以在.h5文件中完全保存和恢復模型的各個方面。 直接來自Keras 常見問題解答：

您可以使用model.save(filepath)將model.save(filepath)模型保存到單個 HDF5 文件中，該文件將包含：

模型的架構，允許重新創建模型

模型的權重

訓練配置（損失，優化器）

優化器的狀態，允許從您停止的地方恢復訓練。

然后您可以使用keras.models.load_model(filepath)重新實例化您的模型。 load_model還將負責使用保存的訓練配置編譯模型（除非模型從未編譯過）。

以及相應的代碼：

from keras.models import load_model

model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'
del model  # deletes the existing model

# returns a compiled model identical to the previous one
model = load_model('my_model.h5')

Answer 5

如果您的 keras 包裝器模型在 scikit 管道中，您可以單獨保存管道中的步驟。

import joblib
from sklearn.pipeline import Pipeline
from tensorflow import keras

# pass the create_cnn_model function into wrapper
cnn_model = keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_cnn_model)

# create pipeline
cnn_model_pipeline_estimator = Pipeline([
    ('preprocessing_pipeline', pipeline_estimator),
    ('clf', cnn_model)
])

# train model
final_model = cnn_model_pipeline_estimator.fit(
X, y, clf__batch_size=32, clf__epochs=15)

# collect the preprocessing pipeline & model seperately
pipeline_estimator = final_model.named_steps['preprocessing_pipeline']
clf = final_model.named_steps['clf']

# store pipeline and model seperately
joblib.dump(pipeline_estimator, open('path/to/pipeline.pkl', 'wb'))
clf.model.save('path/to/model.h5')

# load pipeline and model
pipeline_estimator = joblib.load('path/to/pipeline.pxl')
model = keras.models.load_model('path/to/model.h5')

new_example = [[...]]

# transform new data with pipeline & use model for prediction
transformed_data = pipeline_estimator.transform(new_example)
prediction = model.predict(transformed_data)

如何將 Scikit-Learn-Keras 模型保存到持久性文件中 (pickle/hd5/json/yaml)

問題描述

5 個解決方案

解決方案1
14 已采納 2016-11-03 08:58:38

解決方案2
9 2017-04-04 06:55:31

解決方案3
7 2017-10-02 21:36:27

解決方案4
6 2018-10-15 20:23:50

解決方案5
1 2020-06-07 18:25:05

如何將 Scikit-Learn-Keras 模型保存到持久性文件中 (pickle/hd5/json/yaml)

問題描述

5 個解決方案

解決方案1 14 已采納 2016-11-03 08:58:38

解決方案2 9 2017-04-04 06:55:31

解決方案3 7 2017-10-02 21:36:27

解決方案4 6 2018-10-15 20:23:50

解決方案5 1 2020-06-07 18:25:05

解決方案1
14 已采納 2016-11-03 08:58:38

解決方案2
9 2017-04-04 06:55:31

解決方案3
7 2017-10-02 21:36:27

解決方案4
6 2018-10-15 20:23:50

解決方案5
1 2020-06-07 18:25:05