How can I send another argument via executor.map function? See an example of code:
from concurrent.futures import ProcessPoolExecutor
from dask.dataframe import read_csv
def apply(row_of_small_df):
# Here there is no access to big_df
return
def main():
small_df = read_csv('...')
big_df = read_csv('...')
with ProcessPoolExecutor() as executor:
results = executor.map(apply, small_df.iterrows())
for result in results:
pass
if __name__ == '__main__':
main()
Another alternative is using functools.partial
:
Return a new partial object which when called will behave like func called with the positional arguments args and keyword arguments keywords. If more arguments are supplied to the call, they are appended to args. If additional keyword arguments are supplied, they extend and override keywords.
from functools import partial
def apply(big_df, row_of_small_df):
# requires big_df to be passed in
return
def main():
small_df = read_csv('...')
big_df = read_csv('...')
apply_with_big_df = partial(apply, big_df)
with ProcessPoolExecutor() as executor:
results = executor.map(apply_with_big_df, small_df.iterrows())
for result in results:
pass
use a lambda:
#...
def apply(big_df, row_of_small_df):
pass
#...
results = executor.map(lambda row_of_small: apply(big_df, row_of_small), small_df.iterrows())
#...
Use the zip function like so
results = executor.map(apply, zip(big_df.iterrows(), small_df.iterrows()))
The function should now be
def apply(params):
big, small = params
# your code
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.