[英]Learning Python concurrency with partial
嗨,我正在尝试使用 ThreadPoolExecutor 学习 Python 并发性,并希望了解如何将相同的数据传递给使用 Partial 和 Map 摄取相同数据的多个函数。 我尝试创建一个简单的示例,如下所示。 我知道这可以只使用 1 function 而不是 2 来完成,但我想从概念上理解它,所以试着举一个简单的例子。
import os
import threading
import timeit
from itertools import islice
import concurrent.futures
import string
from functools import partial
path = "C:/Users/some_folder"
input_file_name = os.path.join(path, "input_file_example.txt") ## A very large file
## Function to count characters in a string
def count_chars(ip):
count = len(ip)
return count
## Function to count words in a string
def count_words(ip):
sum([i.strip(string.punctuation).isalpha() for i in ip.split()])
return sum
## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
while True:
lines = list(islice(file_object, lines_per_chunk))
if lines:
yield lines
else:
break
all_funcs = [partial(count_chars), partial(count_words)]
data = []
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
print(piece)
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
for line in piece:
for result in executor.map(lambda x: x(), all_funcs):
data.append(result)
print(data)
第一:您可以在open()
之前创建ThreadPoolExecutor
并且您可以(重新)多次使用它而无需一次又一次地(重新)创建它。
map
是用不同的数据运行一个 function 但你尝试用相同的数据运行不同的功能。
我宁愿创建一个同时运行count_chars
和count_words
的 function 并将此函数与map()
和不同的行一起使用。
def func(line):
a = count_chars(line)
b = count_words(line)
return a,b
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
#print(piece)
results += list(executor.map(func, list(piece)))
print(results)
它给出了列表或对
[(count_chars, count_words), (count_chars, count_words), ...]
所以很容易得到单对。
最终我可以使用zip()
创建包含所有count_chars
的分隔列表和包含所有count_words
的分隔列表
最少的工作代码。
import string
import concurrent.futures
from itertools import islice
#from functools import partial
## Function to count characters in a string
def count_chars(line):
return len(line)
## Function to count words in a string
def count_words(line):
return sum(word.strip(string.punctuation).isalpha() for word in line.split())
## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
while True:
lines = list(islice(file_object, lines_per_chunk))
if lines:
yield lines
else:
break
input_file_name = 'pool-multifunctions.py'
# --- version 1 ----
def func(line):
a = count_chars(line)
b = count_words(line)
return a,b
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
#print(piece)
results += list(executor.map(func, list(piece)))
print('--- results ---')
print(results)
all_count_chars, all_count_words = zip(*results)
print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)
其他版本:
我创建对
all_pairs = [(count_chars, line), (count_words, line)]
并运行它们
lambda x:x[0](x[1])
其中x[0]
将是函数的名称,而x[1]
将是line
我不需要为此而partial
。
它给出了平面列表
[count_chars, count_words, count_chars, count_words, ...]
所以要得到单对并不容易。
要创建包含所有count_chars
的分隔列表和包含所有count_words
的分隔列表,它需要results[0::2]
和results[1::2]
import string
import concurrent.futures
from itertools import islice
#from functools import partial
## Function to count characters in a string
def count_chars(line):
return len(line)
## Function to count words in a string
def count_words(line):
return sum(word.strip(string.punctuation).isalpha() for word in line.split())
## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
while True:
lines = list(islice(file_object, lines_per_chunk))
if lines:
yield lines
else:
break
input_file_name = 'pool-multifunctions.py'
# --- version 2 ---
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
for line in piece:
all_pairs = [(count_chars, line), (count_words, line)]
results += list(executor.map(lambda x:x[0](x[1]), all_pairs))
print('--- results ---')
print(results)
all_count_chars = results[0::2]
all_count_words = results[1::2]
print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)
编辑:
我发现它可以用作
results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))
或者
all_funcs = [count_chars, count_words]
all_data = [line] * len(all_funcs)
results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))
import string
import concurrent.futures
from itertools import islice
#from functools import partial
## Function to count characters in a string
def count_chars(line):
return len(line)
## Function to count words in a string
def count_words(line):
return sum(word.strip(string.punctuation).isalpha() for word in line.split())
## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
while True:
lines = list(islice(file_object, lines_per_chunk))
if lines:
yield lines
else:
break
input_file_name = 'temp-pool-multifunctions.py'
# --- version 3 ---
results = []
all_funcs = (count_chars, count_words)
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
for line in piece:
#results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))
all_data = [line] * len(all_funcs)
results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))
print('--- results ---')
print(results)
all_count_chars = results[0::2]
all_count_words = results[1::2]
print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.