[英]memory error after liblinear svm fails to converge
After getting an error message pertaining to liblinear failing to converge, the following error trace is produced. 在收到与liblinear无法收敛有关的错误消息后,将产生以下错误跟踪。 Im trying to understand what the error is referring to, and how to guard against it.
我试图理解该错误指的是什么,以及如何防范该错误。
The svm is from SKLearn, here is the code setting it up. svm来自SKLearn,这是设置它的代码。
svc = LinearSVC(class_weight='balanced',verbose=1,max_iter=2000)
train_sizes, train_scores, valid_scores = learning_curve(svc,xtscale,np.ravel(ytran),
train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4,verbose=2)
error trace 错误跟踪
.C:\Python27\lib\site-packages\sklearn\svm\base.py:924: ConvergenceWarning: Liblinear failed to conv
erge, increase the number of iterations.
"the number of iterations.", ConvergenceWarning)
............[CV] ................................ no parameters to be set -13.2min
.........Traceback (most recent call last):
File "C:\MachineLearning\SFCrime\crime.py", line 59, in <module>
train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4,verbose=2)
File "C:\Python27\lib\site-packages\sklearn\learning_curve.py", line 153, in learning_curve
for train, test in cv for n_train_samples in train_sizes_abs)
File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 812, in __call__
self.retrieve()
File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 762, in retrieve
raise exception
sklearn.externals.joblib.my_exceptions.JoblibMemoryError: JoblibMemoryError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\MachineLearning\SFCrime\crime.py in <module>()
54 # param_grid = {'C': [0.5, 1, 10]}
55 # gs = grid_search.GridSearchCV(svc, param_grid,n_jobs=4,verbose=1)
56 # gs.fit(testData[:,0:-2],np.ravel(testData[:,-1]))
57 #print gs.best_estimator_
58 train_sizes, train_scores, valid_scores = learning_curve(svc,xtscale,np.ravel(ytran)
,
---> 59 train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4
,verbose=2)
60 #svc.fit(testData[:,0:7],np.ravel(testData[:,7]))
61 #valData = xydecider[np.random.randint(0,xydecider.shape[0],10000)]
62 #print svc.predict(xtransf.transform(np.matrix([2015,7,14,8,35, -122.3935620,37.7782
485])))
63
...........................................................................
C:\Python27\lib\site-packages\sklearn\learning_curve.py in learning_curve(estimator=LinearSVC(C=1.0,
class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
verbose=1), X=array([[ 0.00000000e+00, 1.73165030e+00, -4....603501e-02, 3.21082024e-02,
-1.99147226e-02]]), y=array([ 1, 2, 2, ..., 3, 5, 13]), train_sizes=[10000, 20000, 30000], cv=[
(array([107733, 192190, 212425, ..., 878046, 878047, 878048]), array([ 0, 1, 2, ..., 4
65259, 466404, 486091])), (array([ 0, 1, 2, ..., 878046, 878047, 878048]), array([1077
33, 192190, 212425, ..., 718561, 718572, 718573])), (array([ 0, 1, 2, ..., 718561, 718
572, 718573]), array([316490, 337880, 481804, ..., 878046, 878047, 878048]))], scoring=make_scorer(a
ccuracy_score), exploit_incremental_learning=False, n_jobs=4, pre_dispatch='all', verbose=2)
148 scorer, verbose) for train, test in cv)
149 else:
150 out = parallel(delayed(_fit_and_score)(
151 clone(estimator), X, y, scorer, train[:n_train_samples], test,
152 verbose, parameters=None, fit_params=None, return_train_score=True)
--> 153 for train, test in cv for n_train_samples in train_sizes_abs)
cv = [(array([107733, 192190, 212425, ..., 878046, 878047, 878048]), array([ 0, 1,
2, ..., 465259, 466404, 486091])), (array([ 0, 1, 2, ..., 878046, 878047, 878048]
), array([107733, 192190, 212425, ..., 718561, 718572, 718573])), (array([ 0, 1, 2, ..
., 718561, 718572, 718573]), array([316490, 337880, 481804, ..., 878046, 878047, 878048]))]
154 out = np.array(out)[:, :2]
155 n_cv_folds = out.shape[0] // n_unique_ticks
156 out = out.reshape(n_cv_folds, n_unique_ticks, 2)
157
...........................................................................
C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=
4), iterable=<generator object <genexpr>>)
807 if pre_dispatch == "all" or n_jobs == 1:
808 # The iterable was consumed all at once by the above for loop.
809 # No need to wait for async callbacks to trigger to
810 # consumption.
811 self._iterating = False
--> 812 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=4)>
813 # Make sure that we get a last message telling us we are done
814 elapsed_time = time.time() - self._start_time
815 self._print('Done %3i out of %3i | elapsed: %s finished',
816 (len(self._output), len(self._output),
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
MemoryError Wed Jan 13 12:56:08 2016
PID: 5784 Python 2.7.10: C:\Python27\python.exe
...........................................................................
C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.pyc in __call__(self=<sklearn.extern
als.joblib.parallel.BatchedCalls object>)
67 def __init__(self, iterator_slice):
68 self.items = list(iterator_slice)
69 self._size = len(self.items)
70
71 def __call__(self):
---> 72 return [func(*args, **kwargs) for func, args, kwargs in self.items]
73
74 def __len__(self):
75 return self._size
76
...........................................................................
C:\Python27\lib\site-packages\sklearn\cross_validation.pyc in _fit_and_score(estimator=LinearSVC(C=1
.0, class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
verbose=1), X=memmap([[ 0.00000000e+00, 1.73165030e+00, -4...603501e-02, 3.21082024e-02,
-1.99147226e-02]]), y=memmap([ 1, 2, 2, ..., 3, 5, 13]), scorer=make_scorer(accuracy_score), tr
ain=array([107733, 192190, 212425, ..., 309456, 309457, 309460]), test=memmap([ 0, 1,
2, ..., 465259, 466404, 486091]), verbose=2, parameters=None, fit_params={}, return_train_score=True
, return_parameters=False, error_score='raise')
1519 if parameters is not None:
1520 estimator.set_params(**parameters)
1521
1522 start_time = time.time()
1523
-> 1524 X_train, y_train = _safe_split(estimator, X, y, train)
1525 X_test, y_test = _safe_split(estimator, X, y, test, train)
1526
1527 try:
1528 if y_train is None:
...........................................................................
C:\Python27\lib\site-packages\sklearn\cross_validation.pyc in _safe_split(estimator=LinearSVC(C=1.0,
class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
verbose=1), X=memmap([[ 0.00000000e+00, 1.73165030e+00, -4...603501e-02, 3.21082024e-02,
-1.99147226e-02]]), y=memmap([ 1, 2, 2, ..., 3, 5, 13]), indices=array([107733, 192190, 212425,
..., 309456, 309457, 309460]), train_indices=None)
1586 if train_indices is None:
1587 X_subset = X[np.ix_(indices, indices)]
1588 else:
1589 X_subset = X[np.ix_(indices, train_indices)]
1590 else:
-> 1591 X_subset = safe_indexing(X, indices)
1592
1593 if y is not None:
1594 y_subset = safe_indexing(y, indices)
1595 else:
...........................................................................
C:\Python27\lib\site-packages\sklearn\utils\__init__.pyc in safe_indexing(X=memmap([[ 0.00000000e+0
0, 1.73165030e+00, -4...603501e-02, 3.21082024e-02, -1.99147226e-02]]), indices=array([107733,
192190, 212425, ..., 309456, 309457, 309460]))
158 return X.copy().iloc[indices]
159 elif hasattr(X, "shape"):
160 if hasattr(X, 'take') and (hasattr(indices, 'dtype') and
161 indices.dtype.kind == 'i'):
162 # This is often substantially faster than X[indices]
--> 163 return X.take(indices, axis=0)
164 else:
165 return X[indices]
166 else:
167 return [X[idx] for idx in indices]
MemoryError:
______________
_____________________________________________________________
Try increasing your iteration value. 尝试增加迭代值。 It could be that the SVM needs more iterations to converge than you have allowed it.
SVM收敛可能需要比您允许的迭代更多的迭代。 Also, I would try using sklearn.svm.SVC, as a different algorithm if liblinear can't converge on your data.
另外,如果liblinear无法在您的数据上收敛,我将尝试使用sklearn.svm.SVC作为另一种算法。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.