如何使用 ThreadPoolExecutor 测试并发性？

Question

I have written a program that I would like to use concurrency on and I have implemented it but I am having a hard time testing to see if it will actually spin up more threads than just one.我编写了一个程序，我想在其上使用并发并且我已经实现了它，但是我很难测试它是否真的会启动更多的线程而不是一个。 Does anyone have any suggestions?有没有人有什么建议？ I am just trying to see if this code will ever use 2,3,4,5 workers.我只是想看看这段代码是否会使用 2、3、4、5 个工人。

def read_files():
    t0 = time.process_time()
    cols = ['fname', ' lname', ' age']
    path = 'data'
    files = glob.glob(os.path.join(path, "*.csv"))
    # with open('data/url') as f:
    #     for line in f:
    #         files.append(line.rstrip('\n'))
    bad_files = []
    df_list = []
    for file in files:
        try:
            temp = pd.read_csv(file)
            if temp.columns.to_list() == cols:
                df_list.append(temp)
            else:
                bad_files.append(file)
        except ParserError as pe:
            bad_files.append(file)
            logging.error(f'Parsing Error on {file}. Error: {pe}')
        except ValueError as ve:
            logging.error(f'Value error on reading the csv: {temp}, error: {ve}')
            bad_files.append(file)
        except urllib.error.HTTPError as he:
            bad_files.append(file)
            logging.error(f'Http Error {he}, Code {he.code}')
        except Exception as e:
            bad_files.append(file)
            logging.error(f'Error grabbing data from given {file} possible HTTP error. Error: {e}')

    print(f'Files that were not read {bad_files}')
    df = pd.concat(df_list)
    t1 = time.process_time()
    print(f'It took {t1 - t0} seconds, to read and fill the dataframe.')
    return df


def run_calculations(df):
    if len(df.index) % 2 == 0:
        print(f'Even number of entries, pandas median() method will add both middle numbers and find the average.')

    average = round(df[' age'].mean())
    median = df[' age'].median()
    names_arr = df[df[' age'] == median].values[0]
    fname = names_arr[0]
    lname = names_arr[1]
    print(f'The Average Age is {int(average)}, The Median Age is {int(median)}. {fname} {lname} is {int(median)}')


if __name__ == '__main__':
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.submit(run_calculations(read_files()))
        print(f'I have used {len(executor._threads)} thread(s) for processing')

Answer 1

the short answer is: no, your code will not use more than one workers.简短的回答是：不，您的代码不会使用多个工人。 The reason is you passed a wrong-typed argument to executor.submit , which accepts a callable while you are passing basically None .原因是你传递了一个错误类型的参数给executor.submit ，它接受一个可调用的，而你传递的参数基本上是None 。

A quick fix would be to replace executor.submit(run_calculations(read_files())) with executor.submit(lambda : run_calculations(read_files()))一个快速的解决方法是用executor.submit(lambda : run_calculations(read_files()))替换executor.submit(run_calculations(read_files())) executor.submit(lambda : run_calculations(read_files()))

The following snippet will help to explain how to submit a callable to executor:以下代码段将有助于解释如何向执行程序提交可调用对象：

import time
import threading
from concurrent.futures import ThreadPoolExecutor


def task(time_to_sleep):
    time.sleep(time_to_sleep)
    print(id(threading.current_thread()))


def use_single_worker():
    print("in use single worker")
    with ThreadPoolExecutor(max_workers=5) as executor:
        # a single thread id will get dumped multi times
        futures = [executor.submit(task(i)) for i in range(10)]
        for future in futures:
            try:
                future.result()
            except Exception:
                pass


def use_multiple_workers():
    print("in use multiple workers")
    with ThreadPoolExecutor(max_workers=5) as executor:
        # different thread ids will get dumped
        futures = [executor.submit(lambda: task(i)) for i in range(10)]
        for future in futures:
            try:
                future.result()
            except Exception:
                pass


if __name__ == '__main__':
    use_single_worker()
    use_multiple_workers()

如何使用 ThreadPoolExecutor 测试并发性？

问题描述

1 个解决方案

解决方案1
0 2020-03-26 02:07:36

如何使用 ThreadPoolExecutor 测试并发性？

问题描述

1 个解决方案

解决方案1 0 2020-03-26 02:07:36

解决方案1
0 2020-03-26 02:07:36