I have want to create an endpoint for scikit learn logistic regression model inside sagemaker. I have scikit learn code inside a file called scikitlogistic.py
. The below code is contained in scikitlogistic.py
import subprocess as sb
import pandas as pd
import numpy as np
import pickle,json
import sys
def install(package):
sb.call([sys.executable, "-m", "pip", "install", package])
install('s3fs')
if __name__ =='__main__':
train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train') #train_data
outputlocation = 's3://{}/{}/{}'.format(bucket, prefix, 'logistic_scikit')
os.environ["Train"]=train_data
os.environ["SM_MODEL_DIR"]='s3://<bucket>/<prefix>/model_dir'
os.environ["SM_OUTPUT_DIR"]=outputlocation
parser = argparse.ArgumentParser()
# hyperparameters sent by the client are passed as command-line arguments to the script.
parser.add_argument('--solver', type=str, default='liblinear')
# Data, model, and output directories
parser.add_argument('--output_data_dir', type=str, default=os.environ.get('SM_OUTPUT_DIR'))
parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
parser.add_argument('--train', type=str, default=os.environ.get('Train'))
args, _ = parser.parse_known_args()
df = pd.read_csv(args.train)#writing my data into pandas data frame
y = df.Class
X = df.drop('Class', axis=1)
solver = args.solver
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver=solver).fit(X_train, y_train)
#creating model.joblib folder
s3 = boto3.resource('s3')
object = s3.Object(<bucket>,"<prefix>/model_dir/model.joblib")
object.put(Body=b'abc')
from sklearn.externals import joblib
if __name__=='__main__':
joblib.dump(lr, '/home/ec2-user/SageMaker/<notebookinstancename>/model.joblib')
In my jupyter notebook I have written the following code to train my model on scikit logistic
role = <role>
region = boto3.Session().region_name
bucket = <bucket>
prefix = <prefix>
train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train')
train_channel = sagemaker.session.s3_input(train_data, content_type='text/csv')
output_path = 's3://{}/{}/{}'.format(bucket, prefix,'output_data_dir')
from sagemaker.sklearn.estimator import SKLearn
sklearn = SKLearn(
entry_point='scikitlogistic.py',
train_instance_type="ml.c4.xlarge",
role=role, train_instance_count=1,
sagemaker_session=sagemaker.Session(),output_path=output_path,
hyperparameters={'solver':'liblinear'})
Now I'm fitting it
sklearn.fit({'train': train_channel})
When I'm trying to fit the model it is throwing ValueError: Invalid file path or buffer object type: <class 'NoneType'>
at df = pd.read_csv(args.train)
in scikitlogistic.py
.
2019-12-04 12:31:17 Starting - Starting the training job...
2019-12-04 12:31:18 Starting - Launching requested ML instances......
2019-12-04 12:32:25 Starting - Preparing the instances for training...
2019-12-04 12:33:11 Downloading - Downloading input data...
2019-12-04 12:33:41 Training - Downloading the training image..
2019-12-04 12:34:16 Uploading - Uploading generated training model
2019-12-04 12:34:16 Failed - Training job failed
2019-12-04 12:34:01,194 sagemaker-containers INFO Imported framework sagemaker_sklearn_container.training
2019-12-04 12:34:01,196 sagemaker-containers INFO No GPUs detected (normal if no gpus installed)
2019-12-04 12:34:01,206 sagemaker_sklearn_container.training INFO Invoking user training script.
2019-12-04 12:34:03,100 sagemaker-containers INFO Module scikitlogistic does not provide a setup.py.
Generating setup.py
2019-12-04 12:34:03,101 sagemaker-containers INFO Generating setup.cfg
2019-12-04 12:34:03,101 sagemaker-containers INFO Generating MANIFEST.in
2019-12-04 12:34:03,101 sagemaker-containers INFO Installing module with the following command:
/miniconda3/bin/python -m pip install .
Processing /opt/ml/code
Building wheels for collected packages: scikitlogistic
Building wheel for scikitlogistic (setup.py): started
Building wheel for scikitlogistic (setup.py): finished with status 'done'
Created wheel for scikitlogistic: filename=scikitlogistic-1.0.0-py2.py3-none-any.whl size=7186 sha256=3a209b33ea1fba4843ad74565d16764ebc3c18b5c0b68ea4e7aa0fe4e31960c0
Stored in directory: /tmp/pip-ephem-wheel-cache-rh9bcece/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3
Successfully built scikitlogistic
Installing collected packages: scikitlogistic
Successfully installed scikitlogistic-1.0.0
2019-12-04 12:34:04,479 sagemaker-containers INFO No GPUs detected (normal if no gpus installed)
2019-12-04 12:34:04,490 sagemaker-containers INFO Invoking user script
Training Env:
{
"additional_framework_parameters": {},
"channel_input_dirs": {
"train": "/opt/ml/input/data/train"
},
"current_host": "algo-1",
"framework_module": "sagemaker_sklearn_container.training:main",
"hosts": [
"algo-1"
],
"hyperparameters": {
"solver": "liblinear"
},
"input_config_dir": "/opt/ml/input/config",
"input_data_config": {
"train": {
"TrainingInputMode": "File",
"S3DistributionType": "FullyReplicated",
"RecordWrapperType": "None"
}
},
"input_dir": "/opt/ml/input",
"is_master": true,
"job_name": "sagemaker-scikit-learn-2019-12-04-12-31-17-192",
"log_level": 20,
"master_hostname": "algo-1",
"model_dir": "/opt/ml/model",
"module_dir": "s3://sagemaker2222/sagemaker-scikit-learn-2019-12-04-12-31-17-192/source/sourcedir.tar.gz",
"module_name": "scikitlogistic",
"network_interface_name": "eth0",
"num_cpus": 4,
"num_gpus": 0,
"output_data_dir": "/opt/ml/output/data",
"output_dir": "/opt/ml/output",
"output_intermediate_dir": "/opt/ml/output/intermediate",
"resource_config": {
"current_host": "algo-1",
"hosts": [
"algo-1"
],
"network_interface_name": "eth0"
},
"user_entry_point": "scikitlogistic.py"
}
Environment variables:
SM_HOSTS=["algo-1"]
SM_NETWORK_INTERFACE_NAME=eth0
SM_HPS={"solver":"liblinear"}
SM_USER_ENTRY_POINT=scikitlogistic.py
SM_FRAMEWORK_PARAMS={}
SM_RESOURCE_CONFIG={"current_host":"algo-1","hosts":["algo-1"],"network_interface_name":"eth0"}
SM_INPUT_DATA_CONFIG={"train":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"}}
SM_OUTPUT_DATA_DIR=/opt/ml/output/data
SM_CHANNELS=["train"]
SM_CURRENT_HOST=algo-1
SM_MODULE_NAME=scikitlogistic
SM_LOG_LEVEL=20
SM_FRAMEWORK_MODULE=sagemaker_sklearn_container.training:main
SM_INPUT_DIR=/opt/ml/input
SM_INPUT_CONFIG_DIR=/opt/ml/input/config
SM_OUTPUT_DIR=/opt/ml/output
SM_NUM_CPUS=4
SM_NUM_GPUS=0
SM_MODEL_DIR=/opt/ml/model
SM_MODULE_DIR=s3://sagemaker2222/sagemaker-scikit-learn-2019-12-04-12-31-17-192/source/sourcedir.tar.gz
SM_TRAINING_ENV={"additional_framework_parameters":{},"channel_input_dirs":{"train":"/opt/ml/input/data/train"},"current_host":"algo-1","framework_module":"sagemaker_sklearn_container.training:main","hosts":["algo-1"],"hyperparameters":{"solver":"liblinear"},"input_config_dir":"/opt/ml/input/config","input_data_config":{"train":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"}},"input_dir":"/opt/ml/input","is_master":true,"job_name":"sagemaker-scikit-learn-2019-12-04-12-31-17-192","log_level":20,"master_hostname":"algo-1","model_dir":"/opt/ml/model","module_dir":"s3://sagemaker2222/sagemaker-scikit-learn-2019-12-04-12-31-17-192/source/sourcedir.tar.gz","module_name":"scikitlogistic","network_interface_name":"eth0","num_cpus":4,"num_gpus":0,"output_data_dir":"/opt/ml/output/data","output_dir":"/opt/ml/output","output_intermediate_dir":"/opt/ml/output/intermediate","resource_config":{"current_host":"algo-1","hosts":["algo-1"],"network_interface_name":"eth0"},"user_entry_point":"scikitlogistic.py"}
SM_USER_ARGS=["--solver","liblinear"]
SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate
SM_CHANNEL_TRAIN=/opt/ml/input/data/train
SM_HP_SOLVER=liblinear
PYTHONPATH=/miniconda3/bin:/miniconda3/lib/python37.zip:/miniconda3/lib/python3.7:/miniconda3/lib/python3.7/lib-dynload:/miniconda3/lib/python3.7/site-packages
Invoking script with the following command:
/miniconda3/bin/python -m scikitlogistic --solver liblinear
Collecting s3fs
Downloading https://files.pythonhosted.org/packages/72/5c/ec84c7ec49fde2c3b0d885ecae4504fa40fc77fef7684e9f2939c50f9b94/s3fs-0.4.0-py3-none-any.whl
Requirement already satisfied: boto3>=1.9.91 in /miniconda3/lib/python3.7/site-packages (from s3fs) (1.10.6)
Collecting fsspec>=0.6.0
Downloading https://files.pythonhosted.org/packages/04/1e/6108c48f2d4ad9ef1a6bff01fb58245c009f37b2bd0505ec6d0f55cc326d/fsspec-0.6.1-py3-none-any.whl (62kB)
Requirement already satisfied: botocore>=1.12.91 in /miniconda3/lib/python3.7/site-packages (from s3fs) (1.13.6)
Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /miniconda3/lib/python3.7/site-packages (from boto3>=1.9.91->s3fs) (0.2.1)
Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /miniconda3/lib/python3.7/site-packages (from boto3>=1.9.91->s3fs) (0.9.4)
Requirement already satisfied: python-dateutil<3.0.0,>=2.1; python_version >= "2.7" in /miniconda3/lib/python3.7/site-packages (from botocore>=1.12.91->s3fs) (2.8.0)
Requirement already satisfied: docutils<0.16,>=0.10 in /miniconda3/lib/python3.7/site-packages (from botocore>=1.12.91->s3fs) (0.15.2)
Requirement already satisfied: urllib3<1.26,>=1.20; python_version >= "3.4" in /miniconda3/lib/python3.7/site-packages (from botocore>=1.12.91->s3fs) (1.24.2)
Requirement already satisfied: six>=1.5 in /miniconda3/lib/python3.7/site-packages (from python-dateutil<3.0.0,>=2.1; python_version >= "2.7"->botocore>=1.12.91->s3fs) (1.12.0)
Installing collected packages: fsspec, s3fs
Successfully installed fsspec-0.6.1 s3fs-0.4.0
Traceback (most recent call last):
File "/miniconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/miniconda3/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/opt/ml/code/scikitlogistic.py", line 101, in <module>
df = pd.read_csv(args.train)
File "/miniconda3/lib/python3.7/site-packages/pandas/io/parsers.py", line 685, in parser_f
return _read(filepath_or_buffer, kwds)
File "/miniconda3/lib/python3.7/site-packages/pandas/io/parsers.py", line 440, in _read
filepath_or_buffer, encoding, compression
File "/miniconda3/lib/python3.7/site-packages/pandas/io/common.py", line 224, in get_filepath_or_buffer
raise ValueError(msg.format(_type=type(filepath_or_buffer)))
ValueError: Invalid file path or buffer object type: <class 'NoneType'>
2019-12-04 12:34:06,008 sagemaker-containers ERROR ExecuteUserScriptError:
Command "/miniconda3/bin/python -m scikitlogistic --solver liblinear"
---------------------------------------------------------------------------
UnexpectedStatusException Traceback (most recent call last)
<ipython-input-66-bfad3082f107> in <module>()
----> 1 sklearn.fit({'train': train_data})
~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/estimator.py in fit(self, inputs, wait, logs, job_name)
339 self.latest_training_job = _TrainingJob.start_new(self, inputs)
340 if wait:
--> 341 self.latest_training_job.wait(logs=logs)
342
343 def _compilation_job_name(self):
~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/estimator.py in wait(self, logs)
902 """
903 if logs:
--> 904 self.sagemaker_session.logs_for_job(self.job_name, wait=True)
905 else:
906 self.sagemaker_session.wait_for_job(self.job_name)
~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/session.py in logs_for_job(self, job_name, wait, poll)
1513
1514 if wait:
-> 1515 self._check_job_status(job_name, description, "TrainingJobStatus")
1516 if dot:
1517 print()
~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/session.py in _check_job_status(self, job, desc, status_key_name)
1154 ),
1155 allowed_statuses=["Completed", "Stopped"],
-> 1156 actual_status=status,
1157 )
1158
UnexpectedStatusException: Error for Training job sagemaker-scikit-learn-2019-12-04-12-31-17-192: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
Command "/miniconda3/bin/python -m scikitlogistic --solver liblinear"
I'm sure that path of file is valid. I'm not sure what is causing the error. Can someone please tell what mistake I might have done in scikitlogistic.py
? Also I couldn't find good resources about deploying scikit learn models in sagemaker, other than AWS docs. Can someone guide me to some good resources?
the S3 channels are created in environment variables as SM_CHANNEL_{channel name}
- see documentation here
so I suggest you try replacing os.environ.get('Train')
by os.environ.get('SM_CHANNEL_TRAIN')
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.