DataException error appears when using ML model (.pkl) to predict

Question

I trained the model in Azure with the following excel file: So. as you can see, there are 17 features to be input for training. After the training, I downloaded the model (.pkl) to my local computer and try to make predictions.

And I used the following code to load the model:

import pickle
import json

Pkl_Filename = 'model.pkl'

with open(Pkl_Filename, 'rb') as file:  
    Classifier_Model = pickle.load(file)

data = {"data": [
        {
          "Width": 25.99737167,
          "Length": 171.4788666,
          "Ratio": 6.596007809,
          "Perimter": 394.9524765,
          "Area": 4458,
          "Angle": 14.5224762,
          "R": int(127),
          "G": int(126),
          "B": int(129),
          "H": int(130),
          "S": int(6),
          "V": int(129),
          "H1": int(130),
          "L1": int(128),
          "S1": int(3),
          "# Of Points": 36,
          "Epsilon": 2
        }
      ],
      "method": "predict"
    }

input_data = json.dumps(data)
>>> '{"data": [{"Width": 25.99737167, "Length": 171.4788666, "Ratio": 6.596007809, "Perimter": 394.9524765, "Area": 4458, "Angle": 14.5224762, "R": 127, "G": 126, "B": 129, "H": 130, "S": 6, "V": 129, "H1": 130, "L1": 128, "S1": 3, "# Of Points": 36, "Epsilon": 2}], "method": "predict"}'

predict_data = np.array(json.loads(input_data)['data'])
>>> array([{'Width': 25.99737167, 'Length': 171.4788666, 'Ratio': 6.596007809, 'Perimter': 394.9524765, 'Area': 4458, 'Angle': 14.5224762, 'R': 127, 'G': 126, 'B': 129, 'H': 130, 'S': 6, 'V': 129, 'H1': 130, 'L1': 128, 'S1': 3, '# Of Points': 36, 'Epsilon': 2}],
      dtype=object)

Hair_Classifier_Model.predict(predict_data)

When I run this code, the DataException Error appears, and I do not know how to solve it.

Below is the error code:

DataException: DataException:
    Message: The number of features in [fitted data](17) does not match with those in [input data](1). Please inspect your data, and make sure that features are aligned in both the Datasets.
    InnerException: None
    ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "The number of features in [fitted data](17) does not match with those in [input data](1). Please inspect your data, and make sure that features are aligned in both the Datasets.",
        "target": "X",
        "inner_error": {
            "code": "BadData",
            "inner_error": {
                "code": "InvalidDimension",
                "inner_error": {
                    "code": "DataShapeMismatch"
                }
            }
        },
        "reference_code": "c402b6c2-3870-45a7-8745-c063bd385962"
    }
}

I do not the scoring file is useful or not as shown below:

# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import json
import logging
import os
import pickle
import numpy as np
import pandas as pd
import joblib

import azureml.automl.core
from azureml.automl.core.shared import logging_utilities, log_server
from azureml.telemetry import INSTRUMENTATION_KEY

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType

input_sample = pd.DataFrame({"Width": pd.Series([0.0], dtype="float64"), "Length": pd.Series([0.0], dtype="float64"), "Ratio": pd.Series([0.0], dtype="float64"), "Perimter": pd.Series([0.0], dtype="float64"), "Area": pd.Series([0.0], dtype="float64"), "Angle": pd.Series([0.0], dtype="float64"), "R": pd.Series([0], dtype="int64"), "G": pd.Series([0], dtype="int64"), "B": pd.Series([0], dtype="int64"), "H": pd.Series([0], dtype="int64"), "S": pd.Series([0], dtype="int64"), "V": pd.Series([0], dtype="int64"), "H1": pd.Series([0], dtype="int64"), "L1": pd.Series([0], dtype="int64"), "S1": pd.Series([0], dtype="int64"), "# Of Points": pd.Series([0], dtype="int64"), "Epsilon": pd.Series(["example_value"], dtype="object")})
output_sample = np.array([False])
method_sample = StandardPythonParameterType("predict")

try:
    log_server.enable_telemetry(INSTRUMENTATION_KEY)
    log_server.set_verbosity('INFO')
    logger = logging.getLogger('azureml.automl.core.scoring_script')
except:
    pass


def init():
    global model
    # This name is model.id of model that we want to deploy deserialize the model file back
    # into a sklearn model
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
    path = os.path.normpath(model_path)
    path_split = path.split(os.sep)
    log_server.update_custom_dimensions({'model_name': path_split[-3], 'model_version': path_split[-2]})
    try:
        logger.info("Loading model from path.")
        model = joblib.load(model_path)
        logger.info("Loading successful.")
    except Exception as e:
        logging_utilities.log_traceback(e, logger)
        raise

@input_schema('method', method_sample, convert_to_provided_type=False)
@input_schema('data', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
def run(data, method="predict"):
    try:
        if method == "predict_proba":
            result = model.predict_proba(data)
        elif method == "predict":
            result = model.predict(data)
        else:
            raise Exception(f"Invalid predict method argument received ({method})")
        if isinstance(result, pd.DataFrame):
            result = result.values
        return json.dumps({"result": result.tolist()})
    except Exception as e:
        result = str(e)
        return json.dumps({"error": result})

Answer 1

Since there was no reaction on my comment, I assume it solved the issue.

Currently, predict_data is a np.array with a single entry, which is a dictionary. Thus, the shape of predict_data is (1,) . Since you want the features, extracting the values from the dictionary should do the trick:

predict_data = np.array(json.loads(input_data)['data'].values())

This should result in a predict_data with shape (17,) .

Answer 2

I use this as my data input and it works.

data1 = pd.DataFrame({"Width": pd.Series([width], dtype="float64"), "Length": pd.Series([length], dtype="float64"), "Ratio": pd.Series([rect_ratio], dtype="float64"), "Perimter": pd.Series([perimeter], dtype="float64"), "Area": pd.Series([area], dtype="float64"), "Angle": pd.Series([angle], dtype="float64"), "R": pd.Series([R], dtype="int64"), "G": pd.Series([G], dtype="int64"),
                             "B": pd.Series([B], dtype="int64"), "H": pd.Series([H], dtype="int64"), "S": pd.Series([S], dtype="int64"), "V": pd.Series([V], dtype="int64"), "H1": pd.Series([H1], dtype="int64"), "L1": pd.Series([L1], dtype="int64"), "S1": pd.Series([S1], dtype="int64"), "# Of Points": pd.Series([len(items)], dtype="int64"), "Epsilon": pd.Series([epsilon], dtype="object")})

DataException error appears when using ML model (.pkl) to predict

Question

2 answers

solution1
0 2021-11-24 15:17:25

solution2
0 2021-11-26 07:50:31

DataException error appears when using ML model (.pkl) to predict

Question

2 answers

solution1 0 2021-11-24 15:17:25

solution2 0 2021-11-26 07:50:31

solution1
0 2021-11-24 15:17:25

solution2
0 2021-11-26 07:50:31