繁体   English   中英

Python 多处理,打开的文件太多

[英]Python multiprocessing, too many open files

我目前正在尝试构建一个基于单个 dataframe 保存 10,000 - 50,000 个图像的 scipt

为了加快这个过程,我试图让多处理工作,使用下面的脚本。 在收到有关打开文件过多的错误之前,我获得了大约 4,000 张图像

# import multiprocessing
import multiprocessing as mp

# Create all image files and save to folder
week_list = df[['instrument_id', 'year', 'week_of_year']].drop_duplicates().to_numpy().tolist()

# define function that creates plot image
def create_image(instrument_id, year, week_of_year):
    plt.clf()
    # first slice the dataframe to only contain data from the first week
    weekDF = df[(df['instrument_id'] == instrument_id) & (df['year'] == year) & (df['week_of_year'] == week_of_year)]

    # plot day_pct
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(2,2))

    weekDF.plot(
        ax=axes[0,0],
        x='previous_months_rows_id',
        y='day_pct',
        kind='kde'
    ),
    weekDF.plot(
        ax=axes[0,1],
        x='previous_months_rows_id',
        y='day_pct'
    ),
    weekDF.plot(
        ax=axes[1,0],
        x='previous_months_rows_id',
        y='day_volume',
        kind='kde'
    ),
    weekDF.plot(
        ax=axes[1,1],
        x='previous_months_rows_id',
        y='day_volume'
    )
    axes[0,0].axis("off")
    axes[1,0].axis("off")
    axes[0,1].axis("off")
    axes[1,1].axis("off")

    axes[0,0].get_legend().remove()
    axes[1,0].get_legend().remove()
    axes[0,1].get_legend().remove()
    axes[1,1].get_legend().remove()

    plt.tight_layout()
    plt.ioff()
    fig.savefig('/home/henrik/Dokumenter/Stock/img/{instrument_id}_{year}_{week_of_year}.jpg'.format(instrument_id = instrument_id, year =year, week_of_year = week_of_year))
    plt.close(fig)

processes = [mp.Process(target=create_image, args=(week[0], week[1], week[2])) for week in week_list]

# Run processes
for p in processes:
    p.start()

# Exit the completed processes
for p in processes:
    p.join()

我得到的错误:

OSError                                   Traceback (most recent call last)
<ipython-input-6-901de27246d0> in <module>
     56 # Run processes
     57 for p in processes:
---> 58     p.start()
     59 
     60 # Exit the completed processes

~/anaconda3/lib/python3.7/multiprocessing/process.py in start(self)
    110                'daemonic processes are not allowed to have children'
    111         _cleanup()
--> 112         self._popen = self._Popen(self)
    113         self._sentinel = self._popen.sentinel
    114         # Avoid a refcycle if the target function holds an indirect

~/anaconda3/lib/python3.7/multiprocessing/context.py in _Popen(process_obj)
    221     @staticmethod
    222     def _Popen(process_obj):
--> 223         return _default_context.get_context().Process._Popen(process_obj)
    224 
    225 class DefaultContext(BaseContext):

~/anaconda3/lib/python3.7/multiprocessing/context.py in _Popen(process_obj)
    275         def _Popen(process_obj):
    276             from .popen_fork import Popen
--> 277             return Popen(process_obj)
    278 
    279     class SpawnProcess(process.BaseProcess):

~/anaconda3/lib/python3.7/multiprocessing/popen_fork.py in __init__(self, process_obj)
     18         self.returncode = None
     19         self.finalizer = None
---> 20         self._launch(process_obj)
     21 
     22     def duplicate_for_child(self, fd):

~/anaconda3/lib/python3.7/multiprocessing/popen_fork.py in _launch(self, process_obj)
     67     def _launch(self, process_obj):
     68         code = 1
---> 69         parent_r, child_w = os.pipe()
     70         self.pid = os.fork()
     71         if self.pid == 0:

OSError: [Errno 24] Too many open files

有关此问题的原因和解决方案的任何想法?

以下代码根据 CJR 建议解决了我的问题

pool = mp.Pool(processes=12)
pool.starmap(create_image, week_list)
pool.close()

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM