![](/img/trans.png)
[英]Scikit Learn model results in error when calling local predict or creating model version on Google Cloud ml engine
[英]DataException error appears when using ML model (.pkl) to predict
我使用以下 excel 文件在 Azure 中训练了 model: 所以。 如您所见,有 17 个特征需要输入进行训练。 训练结束后,我将 model (.pkl) 下载到本地计算机并尝试进行预测。
我使用以下代码加载 model:
import pickle
import json
Pkl_Filename = 'model.pkl'
with open(Pkl_Filename, 'rb') as file:
Classifier_Model = pickle.load(file)
data = {"data": [
{
"Width": 25.99737167,
"Length": 171.4788666,
"Ratio": 6.596007809,
"Perimter": 394.9524765,
"Area": 4458,
"Angle": 14.5224762,
"R": int(127),
"G": int(126),
"B": int(129),
"H": int(130),
"S": int(6),
"V": int(129),
"H1": int(130),
"L1": int(128),
"S1": int(3),
"# Of Points": 36,
"Epsilon": 2
}
],
"method": "predict"
}
input_data = json.dumps(data)
>>> '{"data": [{"Width": 25.99737167, "Length": 171.4788666, "Ratio": 6.596007809, "Perimter": 394.9524765, "Area": 4458, "Angle": 14.5224762, "R": 127, "G": 126, "B": 129, "H": 130, "S": 6, "V": 129, "H1": 130, "L1": 128, "S1": 3, "# Of Points": 36, "Epsilon": 2}], "method": "predict"}'
predict_data = np.array(json.loads(input_data)['data'])
>>> array([{'Width': 25.99737167, 'Length': 171.4788666, 'Ratio': 6.596007809, 'Perimter': 394.9524765, 'Area': 4458, 'Angle': 14.5224762, 'R': 127, 'G': 126, 'B': 129, 'H': 130, 'S': 6, 'V': 129, 'H1': 130, 'L1': 128, 'S1': 3, '# Of Points': 36, 'Epsilon': 2}],
dtype=object)
Hair_Classifier_Model.predict(predict_data)
运行这段代码,出现DataException错误,不知道怎么解决。
下面是错误代码:
DataException: DataException:
Message: The number of features in [fitted data](17) does not match with those in [input data](1). Please inspect your data, and make sure that features are aligned in both the Datasets.
InnerException: None
ErrorResponse
{
"error": {
"code": "UserError",
"message": "The number of features in [fitted data](17) does not match with those in [input data](1). Please inspect your data, and make sure that features are aligned in both the Datasets.",
"target": "X",
"inner_error": {
"code": "BadData",
"inner_error": {
"code": "InvalidDimension",
"inner_error": {
"code": "DataShapeMismatch"
}
}
},
"reference_code": "c402b6c2-3870-45a7-8745-c063bd385962"
}
}
我不知道评分文件是否有用如下图所示:
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import json
import logging
import os
import pickle
import numpy as np
import pandas as pd
import joblib
import azureml.automl.core
from azureml.automl.core.shared import logging_utilities, log_server
from azureml.telemetry import INSTRUMENTATION_KEY
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
input_sample = pd.DataFrame({"Width": pd.Series([0.0], dtype="float64"), "Length": pd.Series([0.0], dtype="float64"), "Ratio": pd.Series([0.0], dtype="float64"), "Perimter": pd.Series([0.0], dtype="float64"), "Area": pd.Series([0.0], dtype="float64"), "Angle": pd.Series([0.0], dtype="float64"), "R": pd.Series([0], dtype="int64"), "G": pd.Series([0], dtype="int64"), "B": pd.Series([0], dtype="int64"), "H": pd.Series([0], dtype="int64"), "S": pd.Series([0], dtype="int64"), "V": pd.Series([0], dtype="int64"), "H1": pd.Series([0], dtype="int64"), "L1": pd.Series([0], dtype="int64"), "S1": pd.Series([0], dtype="int64"), "# Of Points": pd.Series([0], dtype="int64"), "Epsilon": pd.Series(["example_value"], dtype="object")})
output_sample = np.array([False])
method_sample = StandardPythonParameterType("predict")
try:
log_server.enable_telemetry(INSTRUMENTATION_KEY)
log_server.set_verbosity('INFO')
logger = logging.getLogger('azureml.automl.core.scoring_script')
except:
pass
def init():
global model
# This name is model.id of model that we want to deploy deserialize the model file back
# into a sklearn model
model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
path = os.path.normpath(model_path)
path_split = path.split(os.sep)
log_server.update_custom_dimensions({'model_name': path_split[-3], 'model_version': path_split[-2]})
try:
logger.info("Loading model from path.")
model = joblib.load(model_path)
logger.info("Loading successful.")
except Exception as e:
logging_utilities.log_traceback(e, logger)
raise
@input_schema('method', method_sample, convert_to_provided_type=False)
@input_schema('data', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
def run(data, method="predict"):
try:
if method == "predict_proba":
result = model.predict_proba(data)
elif method == "predict":
result = model.predict(data)
else:
raise Exception(f"Invalid predict method argument received ({method})")
if isinstance(result, pd.DataFrame):
result = result.values
return json.dumps({"result": result.tolist()})
except Exception as e:
result = str(e)
return json.dumps({"error": result})
由于我的评论没有反应,我认为它解决了这个问题。
目前, predict_data
是一个带有单个条目的np.array
,它是一个字典。 因此, predict_data
的形状是(1,)
。 由于您需要这些功能,因此从字典中提取值应该可以解决问题:
predict_data = np.array(json.loads(input_data)['data'].values())
这应该会产生一个形状为(17,)
的predict_data
。
我用它作为我的数据输入,它可以工作。
data1 = pd.DataFrame({"Width": pd.Series([width], dtype="float64"), "Length": pd.Series([length], dtype="float64"), "Ratio": pd.Series([rect_ratio], dtype="float64"), "Perimter": pd.Series([perimeter], dtype="float64"), "Area": pd.Series([area], dtype="float64"), "Angle": pd.Series([angle], dtype="float64"), "R": pd.Series([R], dtype="int64"), "G": pd.Series([G], dtype="int64"),
"B": pd.Series([B], dtype="int64"), "H": pd.Series([H], dtype="int64"), "S": pd.Series([S], dtype="int64"), "V": pd.Series([V], dtype="int64"), "H1": pd.Series([H1], dtype="int64"), "L1": pd.Series([L1], dtype="int64"), "S1": pd.Series([S1], dtype="int64"), "# Of Points": pd.Series([len(items)], dtype="int64"), "Epsilon": pd.Series([epsilon], dtype="object")})
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.