[英]how to properly initialize a child class of XGBRegressor?
我想构建一个基于 XGBRegressor 的分位数回归器,用于 XGBoost 的 scikit-learn 包装器 class。 我有以下两个版本:第二个版本只是从第一个版本中删除,但它不再有效。
我想知道为什么我需要将 XGBRegressor 的每个参数都放在其子类的初始化中? 如果我只想获取除 max_depth 之外的所有默认参数值怎么办?
(我的 XGBoost 是 1.4.2 版本。)
No.1 按预期工作的完整版本:
class XGBoostQuantileRegressor(XGBRegressor):
def __init__(
self, quant_alpha, n_estimators=100, max_depth=3, base_score=0.5, gpu_id=None,
booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, gamma=0,
importance_type=None, interaction_constraints=None, n_jobs=1, random_state=0,
tree_method='auto', missing=1, objective='reg:linear', learning_rate=0.1,
max_delta_step=0, min_child_weight=1, monotone_constraints=None, num_parallel_tree=1,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=0.5, validate_parameters=False,
verbosity=0
):
self.quant_alpha = quant_alpha
super().__init__(
n_jobs=n_jobs, random_state=random_state, tree_method=tree_method, missing=1,
n_estimators=n_estimators, max_depth=max_depth, objective=objective,
base_score=base_score, booster=booster, colsample_bylevel=colsample_bylevel,
colsample_bynode=colsample_bynode, colsample_bytree=colsample_bytree, gamma=gamma,
gpu_id=gpu_id, importance_type=importance_type, learning_rate=learning_rate,
interaction_constraints=interaction_constraints, max_delta_step=max_delta_step,
min_child_weight=min_child_weight, monotone_constraints=monotone_constraints,
num_parallel_tree=num_parallel_tree, reg_alpha=reg_alpha, reg_lambda=reg_lambda,
scale_pos_weight=scale_pos_weight, validate_parameters=validate_parameters,
verbosity=verbosity, subsample=subsample)
def fit(self, X, y):
super().set_params(
objective=partial(XGBoostQuantileRegressor.log_cosh_loss, alpha=self.quant_alpha))
super().fit(X, y)
return self
def predict(self, X):
return super().predict(X)
@staticmethod
def log_cosh_loss(y_true, y_pred, alpha):
err = y_pred - y_true
err = np.where(err < 0, alpha * err, (1 - alpha) * err)
grad = np.tanh(err)
hess = 1 / np.cosh(err)**2
return grad, hess
2 号不再工作的修剪版本:
class XGBoostQuantileRegressor(XGBRegressor):
def __init__(self, quant_alpha, max_depth=3):
self.quant_alpha = quant_alpha
super().__init__(max_depth=max_depth)
def fit(self, X, y):
super().set_params(
objective=partial(XGBoostQuantileRegressor.log_cosh_loss, alpha=self.quant_alpha))
super().fit(X, y)
return self
def predict(self, X):
return super().predict(X)
@staticmethod
def log_cosh_loss(y_true, y_pred, alpha):
err = y_pred - y_true
err = np.where(err < 0, alpha * err, (1 - alpha) * err)
grad = np.tanh(err)
hess = 1 / np.cosh(err)**2
return grad, hess
这是回溯:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/temp.py in <module>
230 z1 = xgboost_quantile_regressor(alpha=0.95)[0][1]
----> 231 z1.fit(x_trainval, y_trainval)
232 pred_y1 = z1.predict(x_trainval)
233
234 z2 = xgboost_quantile_regressor(alpha=0.05)[0][1]
/temp.py in fit(self, X, y)
~/.local/lib/python3.9/site-packages/optuna/integration/sklearn.py in fit(self, X, y, groups, **fit_params)
873 )
874
--> 875 self.study_.optimize(
876 objective, n_jobs=self.n_jobs, n_trials=self.n_trials, timeout=self.timeout
877 )
~/.local/lib/python3.9/site-packages/optuna/study/study.py in optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
398 )
399
--> 400 _optimize(
401 study=self,
402 func=func,
~/.local/lib/python3.9/site-packages/optuna/study/_optimize.py in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
64 try:
65 if n_jobs == 1:
---> 66 _optimize_sequential(
67 study,
68 func,
~/.local/lib/python3.9/site-packages/optuna/study/_optimize.py in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
161
162 try:
--> 163 trial = _run_trial(study, func, catch)
164 except Exception:
165 raise
~/.local/lib/python3.9/site-packages/optuna/study/_optimize.py in _run_trial(study, func, catch)
262
263 if state == TrialState.FAIL and func_err is not None and not isinstance(func_err, catch):
--> 264 raise func_err
265 return trial
266
~/.local/lib/python3.9/site-packages/optuna/study/_optimize.py in _run_trial(study, func, catch)
211
212 try:
--> 213 value_or_values = func(trial)
214 except exceptions.TrialPruned as e:
215 # TODO(mamu): Handle multi-objective cases.
~/.local/lib/python3.9/site-packages/optuna/integration/sklearn.py in __call__(self, trial)
219 def __call__(self, trial: Trial) -> float:
220
--> 221 estimator = clone(self.estimator)
222 params = self._get_params(trial)
223
~/.local/lib/python3.9/site-packages/sklearn/base.py in clone(estimator, safe)
80 for name, param in new_object_params.items():
81 new_object_params[name] = clone(param, safe=False)
---> 82 new_object = klass(**new_object_params)
83 params_set = new_object.get_params(deep=False)
84
TypeError: __init__() got an unexpected keyword argument 'objective'
我不是 scikit-learn 的专家,但似乎该框架使用的各种对象的要求之一是可以通过调用sklearn.base.clone方法来克隆它们。 这似乎是现有XGBRegressor
class 所做的事情,您的XGBRegressor
子类也必须这样做。
将任何其他意外关键字 arguments 作为**kwargs
参数传递可能会有所帮助。 在您的构造函数中, kwargs
将包含未分配给其他构造函数参数的所有其他关键字参数的字典。 您可以通过再次将它们称为**kwargs
来将此参数字典传递给对超类构造函数的调用:这将导致 Python 将它们展开:
class XGBoostQuantileRegressor(XGBRegressor):
def __init__(self, quant_alpha, max_depth=3, **kwargs):
self.quant_alpha = quant_alpha
super().__init__(max_depth=max_depth, **kwargs)
# other methods unchanged and omitted for brevity.
我之前已经回答了您的一个问题,我将在此重申我在该答案中提出的两点。
首先,我不是数据科学家。 我以前从未使用过 scikit-learn,所以我没有测试我上面发布的代码。
其次,这是另一种情况,我认为您应该更喜欢组合而不是 inheritance。 您已选择使用 inheritance,并且由于该选择而遇到问题。 如果您的 class 没有从XGBRegressor
继承,而是简单地创建了一个XGBRegressor
并将其存储在一个属性中,例如使用一行self.xgb_regressor = XGBRegressor(max_depth=max_depth)
,并且predict
和fit
调用调用了self.xgb_regressor.predict
和self.xgb_regressor.fit
,你不会有这个问题。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.