liblinear svm无法收敛后出现内存错误

[英]memory error after liblinear svm fails to converge

After getting an error message pertaining to liblinear failing to converge, the following error trace is produced. Im trying to understand what the error is referring to, and how to guard against it.

The svm is from SKLearn, here is the code setting it up.

svc = LinearSVC(class_weight='balanced',verbose=1,max_iter=2000)
train_sizes, train_scores, valid_scores = learning_curve(svc,xtscale,np.ravel(ytran), 

error trace

    .C:\Python27\lib\site-packages\sklearn\svm\base.py:924: ConvergenceWarning: Liblinear failed to conv
    erge, increase the number of iterations.
      "the number of iterations.", ConvergenceWarning)
    ............[CV] ................................ no parameters to be set -13.2min
    .........Traceback (most recent call last):
      File "C:\MachineLearning\SFCrime\crime.py", line 59, in <module>
      File "C:\Python27\lib\site-packages\sklearn\learning_curve.py", line 153, in learning_curve
        for train, test in cv for n_train_samples in train_sizes_abs)
      File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 812, in __call__
      File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 762, in retrieve
        raise exception
    sklearn.externals.joblib.my_exceptions.JoblibMemoryError: JoblibMemoryError
    Multiprocessing exception:
    C:\MachineLearning\SFCrime\crime.py in <module>()
         54         # param_grid = {'C': [0.5, 1, 10]}
         55  #      gs = grid_search.GridSearchCV(svc, param_grid,n_jobs=4,verbose=1)
         56  #      gs.fit(testData[:,0:-2],np.ravel(testData[:,-1]))
         57         #print gs.best_estimator_
         58         train_sizes, train_scores, valid_scores = learning_curve(svc,xtscale,np.ravel(ytran)
    ---> 59                 train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4
         60         #svc.fit(testData[:,0:7],np.ravel(testData[:,7]))
         61         #valData = xydecider[np.random.randint(0,xydecider.shape[0],10000)]
         62         #print svc.predict(xtransf.transform(np.matrix([2015,7,14,8,35, -122.3935620,37.7782

    C:\Python27\lib\site-packages\sklearn\learning_curve.py in learning_curve(estimator=LinearSVC(C=1.0,
     class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
         verbose=1), X=array([[  0.00000000e+00,   1.73165030e+00,  -4....603501e-02,   3.21082024e-02,
     -1.99147226e-02]]), y=array([ 1,  2,  2, ...,  3,  5, 13]), train_sizes=[10000, 20000, 30000], cv=[
    (array([107733, 192190, 212425, ..., 878046, 878047, 878048]), array([     0,      1,      2, ..., 4
    65259, 466404, 486091])), (array([     0,      1,      2, ..., 878046, 878047, 878048]), array([1077
    33, 192190, 212425, ..., 718561, 718572, 718573])), (array([     0,      1,      2, ..., 718561, 718
    572, 718573]), array([316490, 337880, 481804, ..., 878046, 878047, 878048]))], scoring=make_scorer(a
    ccuracy_score), exploit_incremental_learning=False, n_jobs=4, pre_dispatch='all', verbose=2)
        148             scorer, verbose) for train, test in cv)
        149     else:
        150         out = parallel(delayed(_fit_and_score)(
        151             clone(estimator), X, y, scorer, train[:n_train_samples], test,
        152             verbose, parameters=None, fit_params=None, return_train_score=True)
    --> 153             for train, test in cv for n_train_samples in train_sizes_abs)
            cv = [(array([107733, 192190, 212425, ..., 878046, 878047, 878048]), array([     0,      1,
         2, ..., 465259, 466404, 486091])), (array([     0,      1,      2, ..., 878046, 878047, 878048]
    ), array([107733, 192190, 212425, ..., 718561, 718572, 718573])), (array([     0,      1,      2, ..
    ., 718561, 718572, 718573]), array([316490, 337880, 481804, ..., 878046, 878047, 878048]))]
        154         out = np.array(out)[:, :2]
        155         n_cv_folds = out.shape[0] // n_unique_ticks
        156         out = out.reshape(n_cv_folds, n_unique_ticks, 2)

    C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=
    4), iterable=<generator object <genexpr>>)
        807             if pre_dispatch == "all" or n_jobs == 1:
        808                 # The iterable was consumed all at once by the above for loop.
        809                 # No need to wait for async callbacks to trigger to
        810                 # consumption.
        811                 self._iterating = False
    --> 812             self.retrieve()
            self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=4)>
        813             # Make sure that we get a last message telling us we are done
        814             elapsed_time = time.time() - self._start_time
        815             self._print('Done %3i out of %3i | elapsed: %s finished',
        816                         (len(self._output), len(self._output),

    Sub-process traceback:
    MemoryError                                        Wed Jan 13 12:56:08 2016
    PID: 5784                             Python 2.7.10: C:\Python27\python.exe
    C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.pyc in __call__(self=<sklearn.extern
    als.joblib.parallel.BatchedCalls object>)
         67     def __init__(self, iterator_slice):
         68         self.items = list(iterator_slice)
         69         self._size = len(self.items)
         71     def __call__(self):
    ---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
         74     def __len__(self):
         75         return self._size

    C:\Python27\lib\site-packages\sklearn\cross_validation.pyc in _fit_and_score(estimator=LinearSVC(C=1
    .0, class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
         verbose=1), X=memmap([[  0.00000000e+00,   1.73165030e+00,  -4...603501e-02,   3.21082024e-02,
     -1.99147226e-02]]), y=memmap([ 1,  2,  2, ...,  3,  5, 13]), scorer=make_scorer(accuracy_score), tr
    ain=array([107733, 192190, 212425, ..., 309456, 309457, 309460]), test=memmap([     0,      1,
    2, ..., 465259, 466404, 486091]), verbose=2, parameters=None, fit_params={}, return_train_score=True
    , return_parameters=False, error_score='raise')
       1519     if parameters is not None:
       1520         estimator.set_params(**parameters)
       1522     start_time = time.time()
    -> 1524     X_train, y_train = _safe_split(estimator, X, y, train)
       1525     X_test, y_test = _safe_split(estimator, X, y, test, train)
       1527     try:
       1528         if y_train is None:

    C:\Python27\lib\site-packages\sklearn\cross_validation.pyc in _safe_split(estimator=LinearSVC(C=1.0,
     class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
         verbose=1), X=memmap([[  0.00000000e+00,   1.73165030e+00,  -4...603501e-02,   3.21082024e-02,
     -1.99147226e-02]]), y=memmap([ 1,  2,  2, ...,  3,  5, 13]), indices=array([107733, 192190, 212425,
     ..., 309456, 309457, 309460]), train_indices=None)
       1586             if train_indices is None:
       1587                 X_subset = X[np.ix_(indices, indices)]
       1588             else:
       1589                 X_subset = X[np.ix_(indices, train_indices)]
       1590         else:
    -> 1591             X_subset = safe_indexing(X, indices)
       1593     if y is not None:
       1594         y_subset = safe_indexing(y, indices)
       1595     else:

    C:\Python27\lib\site-packages\sklearn\utils\__init__.pyc in safe_indexing(X=memmap([[  0.00000000e+0
    0,   1.73165030e+00,  -4...603501e-02,   3.21082024e-02,  -1.99147226e-02]]), indices=array([107733,
     192190, 212425, ..., 309456, 309457, 309460]))
        158             return X.copy().iloc[indices]
        159     elif hasattr(X, "shape"):
        160         if hasattr(X, 'take') and (hasattr(indices, 'dtype') and
        161                                    indices.dtype.kind == 'i'):
        162             # This is often substantially faster than X[indices]
    --> 163             return X.take(indices, axis=0)
        164         else:
        165             return X[indices]
        166     else:
        167         return [X[idx] for idx in indices]



Try increasing your iteration value. It could be that the SVM needs more iterations to converge than you have allowed it. Also, I would try using sklearn.svm.SVC, as a different algorithm if liblinear can't converge on your data.

