![](/img/trans.png)
[英]Making a scalable postgres source machine learning pipeline on GCP
[英]making pipeline for machine learning models
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
model_params = { /* creating dictionary of all classifiers with paramters */
'svm': {
'model': svm.SVC(gamma='auto'),
'params' : {
'svc__C': [1,10,100,1000],
'svc__kernel': ['rbf','linear']
}
},
'logistic_regression' : {
'model': LogisticRegression(solver='liblinear',multi_class='auto'),
'params': {
'logisticregression__C': [1,5,10]
}
},
'random_forest1': {
'model': RandomForestClassifier(),
'params' : {
'randomforestclassifier__n_estimators': [1,5,10]
}
},
'decision_tree': {
'model': DecisionTreeClassifier(),
'params': {
'decisionTreeClassifier__criterion': ["gini","entropy","log_loss"]
}
}
}
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
scores = []
best_estimators = {}
import pandas as pd
for algo, mp in model_params.items():
pipe = make_pipeline(StandardScaler(), mp['model']) /* creating pipeline to scale data and fetching classifiers from dictionary */
clf = GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False) /* using grid search cv on my classifiers */
clf.fit(features,target)
scores.append({
'model': algo,
'best_score': clf.best_score_,
'best_params': clf.best_params_
})
best_estimators[algo] = clf.best_estimator_
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
错误:
Invalid parameter '' for estimator Pipeline(steps=[('standardscaler', StandardScaler()),
('decision_tree', DecisionTreeClassifier() ]). Valid parameters are: ['memory', 'steps', 'verbose'].
该代码适用于 svm 逻辑回归和随机森林分类器,但会为决策树分类器抛出参数错误。 无法弄清楚这是语法问题还是其他问题
它应该是decisiontreeclassifier__criterion
。 make_pipeline()
将阶段名称设置为相应类型的小写字母( https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html )
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.