[英]How to engineer/preprocess features for the input layer of a exported tensorflow 2.0 keras model for tensorflow serving
I have created a model using TensorFlow-2.0-beta1.我使用 TensorFlow-2.0-beta1 创建了一个模型。 This uses the Keras functional API to perform regression on the input data.这使用 Keras 函数式 API 对输入数据执行回归。 The data needs to have the categorical features one-hot encoded and the numeric inputs normalized.数据需要对分类特征进行单热编码,并对数字输入进行归一化。 In the past using the Estimators API in TF1.11 this could be fixed using the feature columns and applying engineering to the feature in the ServingInputReceiver.在过去使用 TF1.11 中的 Estimators API 时,这可以通过使用特征列并将工程应用到 ServingInputReceiver 中的特征来解决。 Is there a way to do something similar when exporting a model from keras?从 keras 导出模型时,有没有办法做类似的事情?
import tensorflow as tf
import pickle
import tensorflow_datasets as tfds
import pandas as pd
tf.keras.backend.clear_session() # For easy reset of notebook state.
VERSION = tf.__version__
CWD = os.getcwd()
PARENT_DIR = os.path.split(CWD)[0]
DATETIME = datetime.datetime.utcnow()
DATA_DIR = os.path.join(PARENT_DIR, 'data')
train_file_path = os.path.join(DATA_DIR, 'traindf.csv')
test_file_path = os.path.join(DATA_DIR, 'testdf.csv')
CATEGORIES = os.path.join(DATA_DIR, "CATEGORIES")
fileObject = open(CATEGORIES, 'rb')
CATEGORIES = pickle.load(fileObject)
fileObject.close()
NUMERICSTATS = os.path.join(DATA_DIR, "NUMERICSTATS")
fileObject = open(NUMERICSTATS, 'rb')
NUMERICSTATS = pickle.load(fileObject)
fileObject.close()
# CSV columns in the input file.
with open(train_file_path, 'r') as f:
names_row = f.readline()
CSV_COLUMNS = names_row.rstrip('\n').split(',')
print(CSV_COLUMNS)
drop_columns = ['SubSilo','Year','StockID', 'QuickRef', 'sumUKQuantity', 'sumNonUKQuantity']
columns_to_use = [col for col in CSV_COLUMNS if col not in drop_columns]
columns_to_use
LABEL_COLUMN = 'totalqty'
FEATURE_COLUMNS = [column for column in columns_to_use if column != LABEL_COLUMN]
test_labels = testdf[LABEL_COLUMN]
COLUMN_DEFAULTS = [tf.dtypes.string, #ProductBrand
tf.dtypes.string, #Department
tf.dtypes.string, #ProductType
tf.dtypes.string, #ProductSubType
tf.dtypes.string, #Silo
tf.dtypes.string, #Level
tf.dtypes.string, #BaseColour
tf.dtypes.string, #Sport
tf.dtypes.string, #UKSize
tf.dtypes.float32, #UnitCostPrice
tf.dtypes.float32, #ExVatSalesValue
tf.dtypes.float32, #RRP_GBP
tf.dtypes.string, #Week
tf.dtypes.int32] #totalqty
def get_dataset(file_path):
dataset = tf.data.experimental.make_csv_dataset(
file_path,
batch_size=60, # Artificially small to make examples easier to show.
label_name=LABEL_COLUMN,
select_columns=columns_to_use ,
column_defaults=COLUMN_DEFAULTS,
num_epochs=1,
ignore_errors=True,
shuffle=False)
return dataset
raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)
def process_categorical_data(data, categories):
"""Returns a one-hot encoded tensor representing categorical values."""
# Remove leading ' '.
data = tf.strings.regex_replace(data, '^ ', '')
# Remove trailing '.'.
data = tf.strings.regex_replace(data, r'\.$', '')
# ONE HOT ENCODE
# Reshape data from 1d (a list) to a 2d (a list of one-element lists)
data = tf.reshape(data, [-1, 1])
# For each element, create a new list of boolean values the length of categories,
# where the truth value is element == category label
data = tf.equal(categories, data)
# Cast booleans to floats.
data = tf.cast(data, tf.float32)
# The entire encoding can fit on one line:
# data = tf.cast(tf.equal(categories, tf.reshape(data, [-1, 1])), tf.float32)
return data
def process_continuous_data(data, mean, std):
# Normalize data
data = (tf.cast(data, tf.float32) - mean) / std
return tf.reshape(data, [-1, 1])
def preprocess(features, labels):
# Process categorial features.
for feature in CATEGORIES.keys():
features[feature] = process_categorical_data(features[feature], CATEGORIES[feature])
# Process continuous features.
for feature in NUMERICSTATS.keys():
features[feature] = process_continuous_data(features[feature],
NUMERICSTATS[feature]['mean'],
NUMERICSTATS[feature]['std']
)
# Assemble features into a single tensor.
features = tf.concat([features[column] for column in FEATURE_COLUMNS], 1)
return features, labels
train_data = raw_train_data.map(preprocess).shuffle(len(traindf))
test_data = raw_test_data.map(preprocess)
def get_model(input_dim):
"""Create a Keras model with layers.
Args:
input_dim: (int) The shape of an item in a batch.
Returns:
A Keras model.
"""
inputs = tf.keras.Input(shape=(input_dim,))
x = tf.keras.layers.Dense(244, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(inputs)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(200, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(100, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(50, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs, outputs)
return model
input_shape, output_shape = train_data.output_shapes
input_dimension = input_shape.dims[1] # [0] is the batch size
model = get_model(input_dimension)
optimizer = tf.keras.optimizers.Adam(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse', tf.keras.metrics.RootMeanSquaredError()])
# The patience parameter is the amount of epochs to check for improvement
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)
# Display training progress by printing a single dot for each completed epoch
class PrintDot(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
tensor_board = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(PARENT_DIR, 'tensorBoardLogs'))
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
factor=0.2,
patience=4,
verbose=1,
min_lr=0.00001)
history = model.fit(train_data,
validation_data=test_data,
epochs=100,
verbose=1,
callbacks=[early_stop,
PrintDot(),
tensor_board,
reduce_lr]
)
tf.keras.experimental.export_saved_model(model, saved_model_path=os.path.join(PARENT_DIR, 'models/1'))
What I would like is to have a model I can serve using TensorFlow serving that will take the features as they are in my training data, 13 of them and preprocess them in the model itself.我想要的是有一个我可以使用 TensorFlow 服务的模型,该模型将采用我的训练数据中的特征,其中 13 个并在模型本身中对它们进行预处理。 Therefore using something like Flask as a middle man won't be needed因此不需要像 Flask 这样的东西作为中间人
You can think of using Tensorflow Transform
, which applies the same Transformations during Serving
, that you have applied during Training
.您可以考虑使用Tensorflow Transform
,它在Serving
期间应用与您在Training
期间应用的相同的 Transformations。
You can replace your functions, process_categorical_data
, process_continuous_data
and preprocess
with the below code:您可以使用以下代码替换您的函数process_categorical_data
、 process_continuous_data
和preprocess
:
def preprocessing_fn(inputs):
"""Preprocess input columns into transformed columns."""
x = inputs['x']
y = inputs['y']
s = inputs['s']
x_centered = x - tft.mean(x)
y_normalized = tft.scale_to_0_1(y)
s_integerized = tft.compute_and_apply_vocabulary(s)
x_centered_times_y_normalized = (x_centered * y_normalized)
return {
'x_centered': x_centered,
'y_normalized': y_normalized,
's_integerized': s_integerized,
'x_centered_times_y_normalized': x_centered_times_y_normalized,
}
# Ignore the warnings
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
transformed_dataset, transform_fn = ( # pylint: disable=unused-variable
(raw_data, raw_data_metadata) | tft_beam.AnalyzeAndTransformDataset(
preprocessing_fn))
transformed_data, transformed_metadata = transformed_dataset
print('\nRaw data:\n{}\n'.format(pprint.pformat(raw_data)))
print('Transformed data:\n{}'.format(pprint.pformat(transformed_data)))
For more details, please refer TF Transform Guide , Tutorial1 and Tutorial2 .更多详细信息,请参阅 TF 变换指南、教程 1和教程 2 。
Were you able to solve this ? 您能解决这个问题吗? I am having a similar requirement and thinking how to achieve it. 我有一个类似的要求,并在思考如何实现。
One option is to convert the Keras model to TF estimator again and then export as estimator pipeline but that defeats the purpose of having Keras in TF2.0 as a production pipeline model. 一种选择是将Keras模型再次转换为TF估计器,然后导出为估计器管道,但这违反了将TF2.0中的Keras作为生产管道模型的目的。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.