fit() 缺少 1 個必需的位置參數:'y'

[英]fit() missing 1 required positional argument: 'y'

X = df.drop(columns="CLASS")
y = df.CLASS

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

preprocessor = ColumnTransformer([
    ('numeric', num_pipe(), ["PINJAM"]),
    ('categoric', cat_pipe(encoder='onehot'), ["JENIS KELAMIN", "STATUS PERNIKAHAN", "JUMLAH TANGGUNGAN"]),

from sklearn.naive_bayes import GaussianNB
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', GaussianNB)

pipeline.fit(X_train, y_train)


TypeError                                 Traceback (most recent call last)
Input In [46], in <cell line: 1>()
----> 1 pipeline.fit(X_train, y_train)

File ~\anaconda3\envs\jcopml\lib\site-packages\sklearn\pipeline.py:394, in Pipeline.fit(self, X, y, **fit_params)
    392     if self._final_estimator != "passthrough":
    393         fit_params_last_step = fit_params_steps[self.steps[-1][0]]
--> 394         self._final_estimator.fit(Xt, y, **fit_params_last_step)
    396 return self

TypeError: fit() missing 1 required positional argument: 'y'


在您的問題中給出一個完整的示例總是更好。 這可以而且應該是最小的。 正如@anastasiya-Romanova 指出的那樣,您必須遵循管道的正確初始化方法,這也在此處顯示。

from sklearn.datasets import make_blobs
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import pandas as pd

# Generate synthetic data + make a pseudo-categorical column with qcut
X, y = make_blobs(n_samples=1000, centers=2, random_state=42)
X = pd.DataFrame(X)
X.columns = ["feat1", "feat2"]
X["feat2"] = pd.qcut(X["feat2"], 3, labels=False, duplicates="drop")

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the pipeline
pipeline = Pipeline([
    ('preprocessor', ColumnTransformer([('scaler', StandardScaler(), ["feat1"]),
                                        ('onehot', OneHotEncoder(handle_unknown='ignore'), ['feat2'])
    ('classifier', GaussianNB())

# Fit the pipeline to the training data
pipeline.fit(X_train, y_train)
# Evaluate the model on the test data
accuracy = pipeline.score(X_test, y_test)
print('Test accuracy:', accuracy)

# show what the preprocessor is doing
X_transformed = pd.DataFrame(pipeline.named_steps['preprocessor'].transform(X))


          0    1    2    3
0 -0.757494  0.0  1.0  0.0
1  1.396373  1.0  0.0  0.0
2  0.648693  1.0  0.0  0.0
3  1.085098  1.0  0.0  0.0
4  0.895531  0.0  1.0  0.0
Test accuracy: 1.0

為了完整起見,sklearn 的鏈接文檔演示了如何以這種方式使用管道:

>>> from sklearn.svm import SVC
>>> from sklearn.preprocessing import StandardScaler
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.pipeline import Pipeline
>>> X, y = make_classification(random_state=0)
>>> X_train, X_test, y_train, y_test = train_test_split(X, y,
...                                                     random_state=0)
>>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])
>>> # The pipeline can be used as any other estimator
>>> # and avoids leaking the test set into the train set
>>> pipe.fit(X_train, y_train)
Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])
>>> pipe.score(X_test, y_test)


