[英]TypeError: 'LocalFileOpener' object is not iterable
我有一個包含數百萬條目的巨大數據集(這是一個普通的.csv 文件,當我用熊貓加載它時沒有錯誤)。 Pandas 在嘗試加載數據集 (.csv) 時遇到困難,所以我決定使用 modin,這顯然允許您使用只有一行差異的多個進程。 當我運行時:
train_df = pd.read_csv("train_data.csv")
我收到以下錯誤:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-16-03696c0c8326> in <module>
----> 1 train_df = pd.read_csv("train_data.csv")
2 train_df
~\anaconda3\lib\site-packages\modin\logging\logger_function.py in run_and_log(*args, **kwargs)
63 """
64 if LogMode.get() == "disable":
---> 65 return f(*args, **kwargs)
66
67 logger = get_logger()
~\anaconda3\lib\site-packages\modin\pandas\io.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
138 _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
139 kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
--> 140 return _read(**kwargs)
141
142
~\anaconda3\lib\site-packages\modin\pandas\io.py in _read(**kwargs)
59
60 squeeze = kwargs.pop("squeeze", False)
---> 61 pd_obj = FactoryDispatcher.read_csv(**kwargs)
62 # This happens when `read_csv` returns a TextFileReader object for iterating through
63 if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
~\anaconda3\lib\site-packages\modin\core\execution\dispatching\factories\dispatcher.py in read_csv(cls, **kwargs)
183 @_inherit_docstrings(factories.BaseFactory._read_csv)
184 def read_csv(cls, **kwargs):
--> 185 return cls.__factory._read_csv(**kwargs)
186
187 @classmethod
~\anaconda3\lib\site-packages\modin\core\execution\dispatching\factories\factories.py in _read_csv(cls, **kwargs)
215 )
216 def _read_csv(cls, **kwargs):
--> 217 return cls.io_cls.read_csv(**kwargs)
218
219 @classmethod
~\anaconda3\lib\site-packages\modin\logging\logger_function.py in run_and_log(*args, **kwargs)
63 """
64 if LogMode.get() == "disable":
---> 65 return f(*args, **kwargs)
66
67 logger = get_logger()
~\anaconda3\lib\site-packages\modin\core\io\file_dispatcher.py in read(cls, *args, **kwargs)
151 postprocessing work on the resulting query_compiler object.
152 """
--> 153 query_compiler = cls._read(*args, **kwargs)
154 # TODO (devin-petersohn): Make this section more general for non-pandas kernel
155 # implementations.
~\anaconda3\lib\site-packages\modin\logging\logger_function.py in run_and_log(*args, **kwargs)
63 """
64 if LogMode.get() == "disable":
---> 65 return f(*args, **kwargs)
66
67 logger = get_logger()
~\anaconda3\lib\site-packages\modin\core\io\text\text_file_dispatcher.py in _read(cls, filepath_or_buffer, **kwargs)
1053 )
1054 f.seek(old_pos)
-> 1055 splits = cls.partitioned_file(
1056 f,
1057 num_partitions=NPartitions.get(),
~\anaconda3\lib\site-packages\modin\logging\logger_function.py in run_and_log(*args, **kwargs)
63 """
64 if LogMode.get() == "disable":
---> 65 return f(*args, **kwargs)
66
67 logger = get_logger()
~\anaconda3\lib\site-packages\modin\core\io\text\text_file_dispatcher.py in partitioned_file(cls, f, num_partitions, nrows, skiprows, quotechar, is_quoting, encoding, newline, header_size, pre_reading)
270 file_size = cls.file_size(f)
271
--> 272 rows_skipper(header_size)
273
274 if pre_reading:
~\anaconda3\lib\site-packages\modin\core\io\text\text_file_dispatcher.py in skipper(n)
496 return 0
497 else:
--> 498 return cls._read_rows(
499 f,
500 quotechar=quotechar,
~\anaconda3\lib\site-packages\modin\logging\logger_function.py in run_and_log(*args, **kwargs)
63 """
64 if LogMode.get() == "disable":
---> 65 return f(*args, **kwargs)
66
67 logger = get_logger()
~\anaconda3\lib\site-packages\modin\core\io\text\text_file_dispatcher.py in _read_rows(cls, f, nrows, quotechar, is_quoting, outside_quotes, encoding, newline)
392 iterator = f
393
--> 394 for line in iterator:
395 if is_quoting and line.count(quotechar) % 2:
396 outside_quotes = not outside_quotes
TypeError: 'LocalFileOpener' object is not iterable
這是完整的代碼:
import modin.pandas as pd
train_df = pd.read_csv("train_data.csv")
根據我的研究,這顯然與 fsspec 版本有關(據我所知)
運行此代碼時出現相同的錯誤:
import fsspec
file_path = r"./train_data.csv"
file = fsspec.open(file_path).open()
for line in file:
print(line)
錯誤:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-15-3fc27fe187bb> in <module>
4 file = fsspec.open(file_path).open()
5
----> 6 for line in file:
7 print(line)
TypeError: 'LocalFileOpener' object is not iterable
一些版本信息:
fsspec version: 0.7.4
modin version: 0.15.2
Windows version: 11
Python version: 3.8.3
有人可以幫助我這里的問題是什么以及我該如何解決它。
提前致謝:)
以下環境適用於我的示例 csv 數據集:
channels:
- conda-forge
dependencies:
- python=3.9
- fsspec=2022.7.1
- modin=0.15.2
您擁有的fsspec
相當舊。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.