繁体   English   中英

学习Python并发部分

[英]Learning Python concurrency with partial

嗨,我正在尝试使用 ThreadPoolExecutor 学习 Python 并发性,并希望了解如何将相同的数据传递给使用 Partial 和 Map 摄取相同数据的多个函数。 我尝试创建一个简单的示例,如下所示。 我知道这可以只使用 1 function 而不是 2 来完成,但我想从概念上理解它,所以试着举一个简单的例子。

import os
import threading
import timeit
from itertools import islice
import concurrent.futures
import string
from functools import partial

path = "C:/Users/some_folder"

input_file_name = os.path.join(path, "input_file_example.txt")    ## A very large file

## Function to count characters in a string
def count_chars(ip):
    count = len(ip)
    return count


## Function to count words in a string
def count_words(ip):
    sum([i.strip(string.punctuation).isalpha() for i in ip.split()])
    return sum


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


all_funcs = [partial(count_chars), partial(count_words)]
data = []


with open(input_file_name) as f:    
    for piece in read_in_chunks(f, 10):
        print(piece)
        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
            for line in piece:
                for result in executor.map(lambda x: x(), all_funcs):
                    data.append(result)
            print(data)

第一:您可以在open()之前创建ThreadPoolExecutor并且您可以(重新)多次使用它而无需一次又一次地(重新)创建它。


map是用不同的数据运行一个 function 但你尝试用相同的数据运行不同的功能。

我宁愿创建一个同时运行count_charscount_words的 function 并将此函数与map()和不同的行一起使用。

def func(line):
    a = count_chars(line)
    b = count_words(line)
    return a,b

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            #print(piece)
            results += list(executor.map(func, list(piece)))

print(results)

它给出了列表或对

[(count_chars, count_words), (count_chars, count_words), ...] 

所以很容易得到单对。

最终我可以使用zip()创建包含所有count_chars的分隔列表和包含所有count_words的分隔列表


最少的工作代码。

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'pool-multifunctions.py'

# --- version 1 ----

def func(line):
    a = count_chars(line)
    b = count_words(line)
    return a,b

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            #print(piece)
            results += list(executor.map(func, list(piece)))

print('--- results ---')
print(results)

all_count_chars, all_count_words = zip(*results)

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

其他版本:

我创建对

 all_pairs = [(count_chars, line), (count_words, line)]

并运行它们

 lambda x:x[0](x[1])

其中x[0]将是函数的名称,而x[1]将是line

我不需要为此而partial

它给出了平面列表

[count_chars, count_words, count_chars, count_words, ...] 

所以要得到单对并不容易。

要创建包含所有count_chars的分隔列表和包含所有count_words的分隔列表,它需要results[0::2]results[1::2]

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'pool-multifunctions.py'

# --- version 2 ---

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            for line in piece:
                all_pairs = [(count_chars, line), (count_words, line)]
                results += list(executor.map(lambda x:x[0](x[1]), all_pairs))

print('--- results ---')
print(results)

all_count_chars = results[0::2]
all_count_words = results[1::2]

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

编辑:

我发现它可以用作

results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))

或者

all_funcs = [count_chars, count_words]
all_data = [line] * len(all_funcs)

results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'temp-pool-multifunctions.py'

# --- version 3 ---

results = []
all_funcs = (count_chars, count_words)

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            for line in piece:
                #results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))
                all_data = [line] * len(all_funcs)
                results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))

print('--- results ---')
print(results)

all_count_chars = results[0::2]
all_count_words = results[1::2]

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM