[英]Getting Nan, Infinite error in sklearn
我正在从sklearn运行几毫升算法。 但是对于所有这些我得到以下错误
/Users//anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric)
448 else:
449 y = column_or_1d(y, warn=True)
--> 450 _assert_all_finite(y)
451 if y_numeric and y.dtype.kind == 'O':
452 y = y.astype(np.float64)
/Users//anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc in _assert_all_finite(X)
50 and not np.isfinite(X).all()):
51 raise ValueError("Input contains NaN, infinity"
---> 52 " or a value too large for %r." % X.dtype)
53
54
ValueError: Input contains NaN, infinity or a value too large for dtype('float64')
请注意,我的设计矩阵没有数值或无穷大。 这是我要检查的内容:
np.isfinite(X_cohort_pr).all()
Out[259]:
True
X.isnull().any().any()
Out[261]:
False
因此,如果您看到我的数据矩阵没有空值或无限值。 那为什么我会收到此错误以及如何解决呢? 这花费了我8个多小时的调试时间。请帮助
EDIT2:
这是数据矩阵的前五行。 它总共有80万行和大约180个奇数特征。
array([[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , -0.2637, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 1. , 0. , 0. , 1. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , -0.0034,
-0.0027, -0.0725, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
-0.0809, -0.355 ],
[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.2413, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , -0.0034,
-0.0027, -0.0718, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
-0.0809, 0.1579],
[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
0. , 0. , 0.1688, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 1. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , -0.0034,
-0.0027, -0.0725, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
-0.0809, 0.1642],
[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.2462, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 1. , 1. , 0. ,
0. , 0. , 0. , 1. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0.0004,
-0.0012, -0.069 , -0.0673, -0.0618, -0.0582, -0.0065, -0.057 ,
-0.0809, 0.1713],
[ 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 1. ,
1. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.1273, 1. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 1. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 1. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , -0.012 , -0.012 , 0. , -0.0028,
-0.0108, 0. , -0.0111, -0.0135, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. , 0. , 0.0037,
-0.0023, -0.0633, -0.0673, -0.0625, -0.0582, -0.0065, -0.057 ,
-0.0809, 0.1713]])
我也看到了一个我运行SVM的程序,然后出现同样的Nan,Inf错误,但它也会打印一些值,如下所示。 同样,任何地方都没有NaN。 我已经完全检查过了。 我仍然不知道为什么会抛出这些值。
_unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
105 if not _unique_labels:
--> 106 raise ValueError("Unknown label type: %r" % ys)
107
108 ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))
ValueError: Unknown label type: 117456 0
117457 0
117458 0
117459 0
117460 0
117461 0
117462 0
117463 0
117464 0
117465 0
117466 2
117467 0
117468 0
117469 0
117470 NaN
117471 0
117472 NaN
117473 3
117474 0
117475 NaN
117476 0
117477 NaN
117478 6
117479 0
117480 0
117481 NaN
117482 NaN
117483 0
117484 NaN
您还没有检查其他情况: 强制转换为np.float64
如果您使用的是更高精度的数据类型,则向下转换可能会将很大的数字呈现为无穷大。 当您以原始格式检查数据时,它会很高兴地报告一个有限的数字。
对于您的确切问题,我不太满意,但这是产生错误的一种方法:如果您需要更多帮助,请提供一个可重现的示例。 经过8小时的调试,我确定您有这样的示例:)
a = np.array([1e100],dtype=np.float128)
# 1e100 still fits within 64 bits. It's not trivial to
# instantiate an array with that large a number without
# doing arithmetic at some point
a *= a
# make it EVEN BIGGER!!! (we're at 1e400 for those keeping track)
a *= a
np.isfinite(a) # prints True: Check
np.isfinite(a.astype(np.float64)) # prints... False. :(
要查看是否存在这种情况,只需检查过程中要使用的数据类型。 如果您正在从文件中读取数据或进行一些算术处理,则可能会发现如此巨大的价值。 您在64位中获得的最大价值是这样的,但是在numpy中有很多数据类型可以容纳该值,就像python中的标准无限精度整数一样,一切皆有可能!
将来,调试过程可能是提取或注释掉ValueError之前的每个条件,并立即打印数据。 您应该能够准确看到数据消失的地方。 自从您使用外部软件包以来,这个过程有些混乱,但是如果您浪费了很多时间,仍然可以获取源代码并以此方式对其进行测试
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.