[英]Learning Python concurrency with partial
嗨,我正在嘗試使用 ThreadPoolExecutor 學習 Python 並發性,並希望了解如何將相同的數據傳遞給使用 Partial 和 Map 攝取相同數據的多個函數。 我嘗試創建一個簡單的示例,如下所示。 我知道這可以只使用 1 function 而不是 2 來完成,但我想從概念上理解它,所以試着舉一個簡單的例子。
import os
import threading
import timeit
from itertools import islice
import concurrent.futures
import string
from functools import partial
path = "C:/Users/some_folder"
input_file_name = os.path.join(path, "input_file_example.txt") ## A very large file
## Function to count characters in a string
def count_chars(ip):
count = len(ip)
return count
## Function to count words in a string
def count_words(ip):
sum([i.strip(string.punctuation).isalpha() for i in ip.split()])
return sum
## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
while True:
lines = list(islice(file_object, lines_per_chunk))
if lines:
yield lines
else:
break
all_funcs = [partial(count_chars), partial(count_words)]
data = []
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
print(piece)
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
for line in piece:
for result in executor.map(lambda x: x(), all_funcs):
data.append(result)
print(data)
第一:您可以在open()
之前創建ThreadPoolExecutor
並且您可以(重新)多次使用它而無需一次又一次地(重新)創建它。
map
是用不同的數據運行一個 function 但你嘗試用相同的數據運行不同的功能。
我寧願創建一個同時運行count_chars
和count_words
的 function 並將此函數與map()
和不同的行一起使用。
def func(line):
a = count_chars(line)
b = count_words(line)
return a,b
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
#print(piece)
results += list(executor.map(func, list(piece)))
print(results)
它給出了列表或對
[(count_chars, count_words), (count_chars, count_words), ...]
所以很容易得到單對。
最終我可以使用zip()
創建包含所有count_chars
的分隔列表和包含所有count_words
的分隔列表
最少的工作代碼。
import string
import concurrent.futures
from itertools import islice
#from functools import partial
## Function to count characters in a string
def count_chars(line):
return len(line)
## Function to count words in a string
def count_words(line):
return sum(word.strip(string.punctuation).isalpha() for word in line.split())
## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
while True:
lines = list(islice(file_object, lines_per_chunk))
if lines:
yield lines
else:
break
input_file_name = 'pool-multifunctions.py'
# --- version 1 ----
def func(line):
a = count_chars(line)
b = count_words(line)
return a,b
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
#print(piece)
results += list(executor.map(func, list(piece)))
print('--- results ---')
print(results)
all_count_chars, all_count_words = zip(*results)
print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)
其他版本:
我創建對
all_pairs = [(count_chars, line), (count_words, line)]
並運行它們
lambda x:x[0](x[1])
其中x[0]
將是函數的名稱,而x[1]
將是line
我不需要為此而partial
。
它給出了平面列表
[count_chars, count_words, count_chars, count_words, ...]
所以要得到單對並不容易。
要創建包含所有count_chars
的分隔列表和包含所有count_words
的分隔列表,它需要results[0::2]
和results[1::2]
import string
import concurrent.futures
from itertools import islice
#from functools import partial
## Function to count characters in a string
def count_chars(line):
return len(line)
## Function to count words in a string
def count_words(line):
return sum(word.strip(string.punctuation).isalpha() for word in line.split())
## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
while True:
lines = list(islice(file_object, lines_per_chunk))
if lines:
yield lines
else:
break
input_file_name = 'pool-multifunctions.py'
# --- version 2 ---
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
for line in piece:
all_pairs = [(count_chars, line), (count_words, line)]
results += list(executor.map(lambda x:x[0](x[1]), all_pairs))
print('--- results ---')
print(results)
all_count_chars = results[0::2]
all_count_words = results[1::2]
print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)
編輯:
我發現它可以用作
results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))
或者
all_funcs = [count_chars, count_words]
all_data = [line] * len(all_funcs)
results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))
import string
import concurrent.futures
from itertools import islice
#from functools import partial
## Function to count characters in a string
def count_chars(line):
return len(line)
## Function to count words in a string
def count_words(line):
return sum(word.strip(string.punctuation).isalpha() for word in line.split())
## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
while True:
lines = list(islice(file_object, lines_per_chunk))
if lines:
yield lines
else:
break
input_file_name = 'temp-pool-multifunctions.py'
# --- version 3 ---
results = []
all_funcs = (count_chars, count_words)
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
with open(input_file_name) as f:
for piece in read_in_chunks(f, 10):
for line in piece:
#results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))
all_data = [line] * len(all_funcs)
results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))
print('--- results ---')
print(results)
all_count_chars = results[0::2]
all_count_words = results[1::2]
print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.