ValueError: Unknown label type: 'continuous

Question

I am trying to carry out a grid search to optimise my parameters my code is:

from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC



parameters = [{'kernel':['rbf'], 'gamma' :[1e-2, 1e-3, 1e-4 ,1e-5], 
          'C': [1, 10, 100, 1000]},
          {'kernel': ['poly'], 'C': [1, 10, 100, 1000], 'degree':[1,2,3,4]}]  

clf = GridSearchCV (SVC(C=1), parameters, cv=5, scoring='f1_macro')
clf.fit(X_train, y_train)

My X_train, y_train are floating point numbers which are:

x_train = [[3.30049159],[2.25226244],[1.44078451] ...,[5.63927925],[5.431458],[4.35674369]]

y_train = [[0.2681013],[0.03454225],[0.02062136]...,[0.21827915],[0.28768273,[0.27969417]]

I believe the error may be that I am using floating point numbers and maybe only integers are able to be passed into the classifier if this is the case, how would this be resolved? My full traceback error message is:

ValueError                                Traceback (most recent call last)
<ipython-input-51-fb016a0a90cc> in <module>()
     11 
     12 clf = GridSearchCV (SVC(C=1), parameters, cv=5, scoring='f1_macro')
---> 13 clf.fit(X_train, y_train)

~/anaconda3_501/lib/python3.6/site-packages/sklearn/grid_search.py in fit(self, X, y)
    836 
    837         """
--> 838         return self._fit(X, y, ParameterGrid(self.param_grid))
    839 
    840 

~/anaconda3_501/lib/python3.6/site-packages/sklearn/grid_search.py in _fit(self, X, y, parameter_iterable)
    572                                     self.fit_params, return_parameters=True,
    573                                     error_score=self.error_score)
--> 574                 for parameters in parameter_iterable
    575                 for train, test in cv)
    576 

~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    777             # was dispatched. In particular this covers the edge
    778             # case of Parallel used with an exhausted iterator.
--> 779             while self.dispatch_one_batch(iterator):
    780                 self._iterating = True
    781             else:

~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    623                 return False
    624             else:
--> 625                 self._dispatch(tasks)
    626                 return True
    627 

~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    586         dispatch_timestamp = time.time()
    587         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588         job = self._backend.apply_async(batch, callback=cb)
    589         self._jobs.append(job)
    590 


        109     def apply_async(self, func, callback=None):
        110         """Schedule a func to be run"""
    --> 111         result = ImmediateResult(func)
        112         if callback:
        113             callback(result)

    ~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
        330         # Don't delay the application, to avoid keeping the input
        331         # arguments in memory
    --> 332         self.results = batch()
        333 
        334     def get(self):

    ~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
        129 
        130     def __call__(self):
    --> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        132 
        133     def __len__(self):

~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~/anaconda3_501/lib/python3.6/site-packages/sklearn/cross_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, error_score)
   1673             estimator.fit(X_train, **fit_params)
   1674         else:
-> 1675             estimator.fit(X_train, y_train, **fit_params)
   1676 
   1677     except Exception as e:

~/anaconda3_501/lib/python3.6/site-packages/sklearn/svm/base.py in fit(self, X, y, sample_weight)
    148 
    149         X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
--> 150         y = self._validate_targets(y)
    151 
    152         sample_weight = np.asarray([]

~/anaconda3_501/lib/python3.6/site-packages/sklearn/svm/base.py in _validate_targets(self, y)
    498     def _validate_targets(self, y):
    499         y_ = column_or_1d(y, warn=True)
--> 500         check_classification_targets(y)
    501         cls, y = np.unique(y_, return_inverse=True)
    502         self.class_weight_ = compute_class_weight(self.class_weight, cls, y_)

~/anaconda3_501/lib/python3.6/site-packages/sklearn/utils/multiclass.py in check_classification_targets(y)
    170     if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
    171                       'multilabel-indicator', 'multilabel-sequences']:
--> 172         raise ValueError("Unknown label type: %r" % y_type)
    173 
    174 

ValueError: Unknown label type: 'continuous'

Help with this would be appreciated.

Answer 1

You are using a classifier. You can only classify binary or categorical variables. If you want to use support vector but predict numeric values you should use support vector regression. Otherwise you will have to classify your y-values into groups.

Answer 2

This is a regression problem not a classification problem. What the model is trying to do is - fit X into classes defined by Y (which are continuous). This is unknown to SVC classifier. Update your code with SVR

from sklearn.svm import SVR
from sklearn.grid_search import GridSearchCV

X_train = [[3.30049159], [2.25226244], [1.44078451]]

#1. Y should be 1d array of dimensions (n_samples,)
y_train = [0.2681013, 0.03454225, 0.02062136] 

#Grid Search
parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-3, 1e-4, 1e-5],
               'C': [1, 10, 100, 1000]},
              {'kernel': ['poly'], 'C': [1, 10, 100, 1000], 'degree': [1, 2, 3, 4]}]

#2. Type of regressor 
reg = SVR(C=1)

#3. Regression evaluation cannot be done using f1_macro, so updated NMSE
clf = GridSearchCV(reg, parameters, cv=5, scoring='neg_mean_squared_error')
clf.fit(X_train, y_train)

ValueError: Unknown label type: 'continuous

Question

2 answers

solution1
2 2018-07-02 13:54:03

solution2
2 ACCPTED 2018-07-02 14:02:18

ValueError: Unknown label type: 'continuous

Question

2 answers

solution1 2 2018-07-02 13:54:03

solution2 2 ACCPTED 2018-07-02 14:02:18

solution1
2 2018-07-02 13:54:03

solution2
2 ACCPTED 2018-07-02 14:02:18