I trained the model in Azure with the following excel file: So. as you can see, there are 17 features to be input for training. After the training, I downloaded the model (.pkl) to my local computer and try to make predictions.
And I used the following code to load the model:
import pickle
import json
Pkl_Filename = 'model.pkl'
with open(Pkl_Filename, 'rb') as file:
Classifier_Model = pickle.load(file)
data = {"data": [
{
"Width": 25.99737167,
"Length": 171.4788666,
"Ratio": 6.596007809,
"Perimter": 394.9524765,
"Area": 4458,
"Angle": 14.5224762,
"R": int(127),
"G": int(126),
"B": int(129),
"H": int(130),
"S": int(6),
"V": int(129),
"H1": int(130),
"L1": int(128),
"S1": int(3),
"# Of Points": 36,
"Epsilon": 2
}
],
"method": "predict"
}
input_data = json.dumps(data)
>>> '{"data": [{"Width": 25.99737167, "Length": 171.4788666, "Ratio": 6.596007809, "Perimter": 394.9524765, "Area": 4458, "Angle": 14.5224762, "R": 127, "G": 126, "B": 129, "H": 130, "S": 6, "V": 129, "H1": 130, "L1": 128, "S1": 3, "# Of Points": 36, "Epsilon": 2}], "method": "predict"}'
predict_data = np.array(json.loads(input_data)['data'])
>>> array([{'Width': 25.99737167, 'Length': 171.4788666, 'Ratio': 6.596007809, 'Perimter': 394.9524765, 'Area': 4458, 'Angle': 14.5224762, 'R': 127, 'G': 126, 'B': 129, 'H': 130, 'S': 6, 'V': 129, 'H1': 130, 'L1': 128, 'S1': 3, '# Of Points': 36, 'Epsilon': 2}],
dtype=object)
Hair_Classifier_Model.predict(predict_data)
When I run this code, the DataException Error appears, and I do not know how to solve it.
Below is the error code:
DataException: DataException:
Message: The number of features in [fitted data](17) does not match with those in [input data](1). Please inspect your data, and make sure that features are aligned in both the Datasets.
InnerException: None
ErrorResponse
{
"error": {
"code": "UserError",
"message": "The number of features in [fitted data](17) does not match with those in [input data](1). Please inspect your data, and make sure that features are aligned in both the Datasets.",
"target": "X",
"inner_error": {
"code": "BadData",
"inner_error": {
"code": "InvalidDimension",
"inner_error": {
"code": "DataShapeMismatch"
}
}
},
"reference_code": "c402b6c2-3870-45a7-8745-c063bd385962"
}
}
I do not the scoring file is useful or not as shown below:
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import json
import logging
import os
import pickle
import numpy as np
import pandas as pd
import joblib
import azureml.automl.core
from azureml.automl.core.shared import logging_utilities, log_server
from azureml.telemetry import INSTRUMENTATION_KEY
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
input_sample = pd.DataFrame({"Width": pd.Series([0.0], dtype="float64"), "Length": pd.Series([0.0], dtype="float64"), "Ratio": pd.Series([0.0], dtype="float64"), "Perimter": pd.Series([0.0], dtype="float64"), "Area": pd.Series([0.0], dtype="float64"), "Angle": pd.Series([0.0], dtype="float64"), "R": pd.Series([0], dtype="int64"), "G": pd.Series([0], dtype="int64"), "B": pd.Series([0], dtype="int64"), "H": pd.Series([0], dtype="int64"), "S": pd.Series([0], dtype="int64"), "V": pd.Series([0], dtype="int64"), "H1": pd.Series([0], dtype="int64"), "L1": pd.Series([0], dtype="int64"), "S1": pd.Series([0], dtype="int64"), "# Of Points": pd.Series([0], dtype="int64"), "Epsilon": pd.Series(["example_value"], dtype="object")})
output_sample = np.array([False])
method_sample = StandardPythonParameterType("predict")
try:
log_server.enable_telemetry(INSTRUMENTATION_KEY)
log_server.set_verbosity('INFO')
logger = logging.getLogger('azureml.automl.core.scoring_script')
except:
pass
def init():
global model
# This name is model.id of model that we want to deploy deserialize the model file back
# into a sklearn model
model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
path = os.path.normpath(model_path)
path_split = path.split(os.sep)
log_server.update_custom_dimensions({'model_name': path_split[-3], 'model_version': path_split[-2]})
try:
logger.info("Loading model from path.")
model = joblib.load(model_path)
logger.info("Loading successful.")
except Exception as e:
logging_utilities.log_traceback(e, logger)
raise
@input_schema('method', method_sample, convert_to_provided_type=False)
@input_schema('data', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
def run(data, method="predict"):
try:
if method == "predict_proba":
result = model.predict_proba(data)
elif method == "predict":
result = model.predict(data)
else:
raise Exception(f"Invalid predict method argument received ({method})")
if isinstance(result, pd.DataFrame):
result = result.values
return json.dumps({"result": result.tolist()})
except Exception as e:
result = str(e)
return json.dumps({"error": result})
Since there was no reaction on my comment, I assume it solved the issue.
Currently, predict_data
is a np.array
with a single entry, which is a dictionary. Thus, the shape of predict_data
is (1,)
. Since you want the features, extracting the values from the dictionary should do the trick:
predict_data = np.array(json.loads(input_data)['data'].values())
This should result in a predict_data
with shape (17,)
.
I use this as my data input and it works.
data1 = pd.DataFrame({"Width": pd.Series([width], dtype="float64"), "Length": pd.Series([length], dtype="float64"), "Ratio": pd.Series([rect_ratio], dtype="float64"), "Perimter": pd.Series([perimeter], dtype="float64"), "Area": pd.Series([area], dtype="float64"), "Angle": pd.Series([angle], dtype="float64"), "R": pd.Series([R], dtype="int64"), "G": pd.Series([G], dtype="int64"),
"B": pd.Series([B], dtype="int64"), "H": pd.Series([H], dtype="int64"), "S": pd.Series([S], dtype="int64"), "V": pd.Series([V], dtype="int64"), "H1": pd.Series([H1], dtype="int64"), "L1": pd.Series([L1], dtype="int64"), "S1": pd.Series([S1], dtype="int64"), "# Of Points": pd.Series([len(items)], dtype="int64"), "Epsilon": pd.Series([epsilon], dtype="object")})
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.