简体   繁体   中英

Learning Python concurrency with partial

Hi I'm trying to learn Python concurrency with ThreadPoolExecutor and wanted some help in understanding how to pass the same data to multiple functions which ingest the same data using Partial and Map. I tried creating a quick example as below. I know this can be done using just 1 function instead of 2 but I want to understand it conceptually so trying my hand at coming up with a quick example.

import os
import threading
import timeit
from itertools import islice
import concurrent.futures
import string
from functools import partial

path = "C:/Users/some_folder"

input_file_name = os.path.join(path, "input_file_example.txt")    ## A very large file

## Function to count characters in a string
def count_chars(ip):
    count = len(ip)
    return count


## Function to count words in a string
def count_words(ip):
    sum([i.strip(string.punctuation).isalpha() for i in ip.split()])
    return sum


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


all_funcs = [partial(count_chars), partial(count_words)]
data = []


with open(input_file_name) as f:    
    for piece in read_in_chunks(f, 10):
        print(piece)
        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
            for line in piece:
                for result in executor.map(lambda x: x(), all_funcs):
                    data.append(result)
            print(data)

First: you can create ThreadPoolExecutor before open() and you can (re)use it many times without (re)creating it again and again.


map is rather to run the one function with different data but you try to run different functions with the same data.

I would rather create one function which runs both count_chars and count_words and use this functiion with map() and with different lines.

def func(line):
    a = count_chars(line)
    b = count_words(line)
    return a,b

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            #print(piece)
            results += list(executor.map(func, list(piece)))

print(results)

And it gives list or pairs

[(count_chars, count_words), (count_chars, count_words), ...] 

so it is easy to get single pair.

Eventually I could use zip() to create separated list with all count_chars and separeted list with all count_words


Minimal working code.

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'pool-multifunctions.py'

# --- version 1 ----

def func(line):
    a = count_chars(line)
    b = count_words(line)
    return a,b

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            #print(piece)
            results += list(executor.map(func, list(piece)))

print('--- results ---')
print(results)

all_count_chars, all_count_words = zip(*results)

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

Other version:

I create pairs

 all_pairs = [(count_chars, line), (count_words, line)]

and run them with

 lambda x:x[0](x[1])

where x[0] will be function's name, and x[1] will be line

And I don't need partial for this.

It gives flat list

[count_chars, count_words, count_chars, count_words, ...] 

so it is not so easy to get single pair.

To create separated list with all count_chars and separeted list with all count_words it needs results[0::2] and results[1::2]

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'pool-multifunctions.py'

# --- version 2 ---

results = []

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            for line in piece:
                all_pairs = [(count_chars, line), (count_words, line)]
                results += list(executor.map(lambda x:x[0](x[1]), all_pairs))

print('--- results ---')
print(results)

all_count_chars = results[0::2]
all_count_words = results[1::2]

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

EDIT:

I findout it can be use as

results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))

or

all_funcs = [count_chars, count_words]
all_data = [line] * len(all_funcs)

results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))

import string
import concurrent.futures
from itertools import islice
#from functools import partial


## Function to count characters in a string
def count_chars(line):
    return len(line)


## Function to count words in a string
def count_words(line):
    return sum(word.strip(string.punctuation).isalpha() for word in line.split())


## Divide a very large file in chunks for reading
def read_in_chunks(file_object, lines_per_chunk):
    while True:
        lines = list(islice(file_object, lines_per_chunk))
        if lines:
            yield lines
        else:
            break


input_file_name = 'temp-pool-multifunctions.py'

# --- version 3 ---

results = []
all_funcs = (count_chars, count_words)

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    with open(input_file_name) as f:    
        for piece in read_in_chunks(f, 10):
            for line in piece:
                #results += list(executor.map(lambda func,data:func(data), [count_chars, count_words], [line, line]))
                all_data = [line] * len(all_funcs)
                results += list(executor.map(lambda func,data:func(data), all_funcs, all_data))

print('--- results ---')
print(results)

all_count_chars = results[0::2]
all_count_words = results[1::2]

print('--- all_count_chars ---')
print(all_count_chars)
print('--- all_count_words ---')
print(all_count_words)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM