簡體   English   中英

學習Python並發部分

[英]Learning Python concurrency with partial

嗨,我正在嘗試使用 ThreadPoolExecutor 學習 Python 並發性,並希望了解如何將相同的數據傳遞給使用 Partial 和 Map 攝取相同數據的多個函數。 我嘗試創建一個簡單的示例,如下所示。 我知道這可以只使用 1 function 而不是 2 來完成,但我想從概念上理解它,所以試着舉一個簡單的例子。

import os
import threading
import timeit
from itertools import islice
import concurrent.futures
import string
from functools import partial

path = "C:/Users/some_folder"

input_file_name = os.path.join(path, "input_file_example.txt")    ## A very large file

## Function to count characters in a string
def count_chars(ip):
    count = len(ip)
    return count


## Function to count words in a string
def count_words(ip):
    sum([i.strip(string.punctuation).isalpha() for i in ip.split()])
    return sum


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


all_funcs = [partial(count_chars), partial(count_words)]
data = []


with open(input_file_name) as f:    
    for piece in read_in_chunks(f, 10):
        print(piece)
        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
            for line in piece:
                for result in executor.map(lambda x: x(), all_funcs):
                    data.append(result)
            print(data)

第一:您可以在open()之前創建ThreadPoolExecutor並且您可以(重新)多次使用它而無需一次又一次地(重新)創建它。


map是用不同的數據運行一個 function 但你嘗試用相同的數據運行不同的功能。

我寧願創建一個同時運行count_charscount_words的 function 並將此函數與map()和不同的行一起使用。

def func(line):
    a = count_chars(line)
    b = count_words(line)
    return a,b

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            #print(piece)
            results += list(executor.map(func, list(piece)))

print(results)

它給出了列表或對

[(count_chars, count_words), (count_chars, count_words), ...] 

所以很容易得到單對。

最終我可以使用zip()創建包含所有count_chars的分隔列表和包含所有count_words的分隔列表


最少的工作代碼。

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'pool-multifunctions.py'

# --- version 1 ----

def func(line):
    a = count_chars(line)
    b = count_words(line)
    return a,b

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            #print(piece)
            results += list(executor.map(func, list(piece)))

print('--- results ---')
print(results)

all_count_chars, all_count_words = zip(*results)

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

其他版本:

我創建對

 all_pairs = [(count_chars, line), (count_words, line)]

並運行它們

 lambda x:x[0](x[1])

其中x[0]將是函數的名稱,而x[1]將是line

我不需要為此而partial

它給出了平面列表

[count_chars, count_words, count_chars, count_words, ...] 

所以要得到單對並不容易。

要創建包含所有count_chars的分隔列表和包含所有count_words的分隔列表,它需要results[0::2]results[1::2]

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'pool-multifunctions.py'

# --- version 2 ---

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            for line in piece:
                all_pairs = [(count_chars, line), (count_words, line)]
                results += list(executor.map(lambda x:x[0](x[1]), all_pairs))

print('--- results ---')
print(results)

all_count_chars = results[0::2]
all_count_words = results[1::2]

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

編輯:

我發現它可以用作

results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))

或者

all_funcs = [count_chars, count_words]
all_data = [line] * len(all_funcs)

results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'temp-pool-multifunctions.py'

# --- version 3 ---

results = []
all_funcs = (count_chars, count_words)

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            for line in piece:
                #results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))
                all_data = [line] * len(all_funcs)
                results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))

print('--- results ---')
print(results)

all_count_chars = results[0::2]
all_count_words = results[1::2]

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM