How do I invoke and wait for 1,000 AWS Lambdas running in parallel from Python?

When I use third party aiobotocore it works up to NUM_WORKERS=500 and If I want to go up to a 1000 I get this error:

    r, w, _ = self._select(self._readers, self._writers, [], timeout)
  File ".....\lib\selectors.py", line 314, in _select
    r, w, x = select.select(r, w, w, timeout)
ValueError: too many file descriptors in select()

If there a way to execute 1000 in parallel?


import os, sys, time, json
import asyncio
from itertools import chain
from typing import List
import logging
from functools import partial
from pprint import pprint 

# Third Party
import asyncpool
import aiobotocore.session
import aiobotocore.config


async def execute_lambda( lambda_name: str, key: str, client):
    # Get json content from s3 object
    if 1:
        response = await client.invoke(
    async for event in response['Payload']:

    #await asyncio.sleep(1)
    return out

async def submit(lambda_name: str) -> List[dict]:
    Returns list of AWS Lambda outputs executed in parallel

    :param name: name of lambda function
    :return: list of lambda returns
    logger = logging.getLogger()

    session = aiobotocore.session.AioSession()
    config = aiobotocore.config.AioConfig(max_pool_connections=_NUM_WORKERS)
    contents = []
    #client = boto3.client('lambda', region_name='us-west-2')
    async with session.create_client('lambda', region_name='us-west-2', config=config) as client:
        worker_co = partial(execute_lambda, lambda_name)
        async with asyncpool.AsyncPool(None, _NUM_WORKERS, 'lambda_work_queue', logger, worker_co,
                                       return_futures=True, raise_on_join=True, log_every_n=10) as work_pool:
            for x in range(_NUM_WORKERS):
                contents.append(await work_pool.push(x, client))

    # retrieve results from futures
    contents = [c.result() for c in contents]
    return list(chain.from_iterable(contents))

def main(name, files):
    s = time.perf_counter()
    _loop = asyncio.get_event_loop()
    _result = _loop.run_until_complete(submit(name))
    elapsed = time.perf_counter() - s
    print(f"{__file__} executed in {elapsed:0.2f} seconds.")

Lambda function:

import time
def lambda_handler(event, context):
    return {'code':0, 'exec_id':event['exec_id']}


 '{"code": 0, "exec_id": 0}',
 '{"code": 0, "exec_id": 1}',
 '{"code": 0, "exec_id": 2}',
 '{"code": 0, "exec_id": 3}',
 '{"code": 0, "exec_id": 496}',
 '{"code": 0, "exec_id": 497}',
 '{"code": 0, "exec_id": 498}',
 '{"code": 0, "exec_id": 499}']
my_cli_script.py executed in 14.56 seconds.

In response to the question asked in the comments here , here's the code I use to spin up 100 lambda instances in parallel:

import boto3
import json
from concurrent.futures import ThreadPoolExecutor

# AWS credentials are exported in my env variables
# so region and account-id are fetched from there
lambda_ = boto3.client('lambda')

def invoke_lambda(payload):
    payload = {'body': json.dumps(payload)}

    response = lambda_.invoke(
        # I need to receive a response back from lambda
        # so I use sync invocation

    res_payload = response.get('Payload').read()
    body = json.loads(res_payload).get('body')
    return body

MAX_WORKERS = 100  # how many lambdas you want to spin up concurrently

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    result = list(executor.map(invoke_lambda, data))
# data is a list of dicts, each element is a single "payload"

Two last notes:

  1. a dozen milliseconds to spawn 100 concurrent lambdas was probably an exaggeration. For some reason if I set a higher granularity in cloudwatch metrics it plots nothing so I can't say for sure how long it took exactly. To thread safely I would say within 2 seconds.
  2. this piece of code has only been run in my local environment so far. It's pretty vanilla so I don't see why it wouldn't work elsewhere (for example, another parent lambda), but as a word of warning, I haven't tested it online yet.

found this post: python-asyncio-aiohttp-valueerror-too-many-file-descriptors-in-select-on-win

After change it started to work

# 1000 is a soft concurrency limit

def main(name, files):
    if sys.platform == 'win32':
        _loop = asyncio.ProactorEventLoop()
        _result = _loop.run_until_complete(submit(name))
        _loop = asyncio.get_event_loop()
        _result = _loop.run_until_complete(submit(name))
    process = psutil.Process(os.getpid())
    print(f"{__file__}: memory[{process.memory_info().rss/1024:7,.2f}], elapsed {elapsed:0.2f} sec")


 '{"code": 0, "exec_id": 986}',
 '{"code": 0, "exec_id": 987}',
 '{"code": 0, "exec_id": 988}',
 '{"code": 0, "exec_id": 989}']
my_cli_script.py: memory[201,064.00], elapsed 16.53 sec


