When I use third party aiobotocore it works up to NUM_WORKERS=500 and If I want to go up to a 1000 I get this error:
r, w, _ = self._select(self._readers, self._writers, [], timeout)
File ".....\lib\selectors.py", line 314, in _select
r, w, x = select.select(r, w, w, timeout)
ValueError: too many file descriptors in select()
If there a way to execute 1000 in parallel?
Source:
import os, sys, time, json
import asyncio
from itertools import chain
from typing import List
import logging
from functools import partial
from pprint import pprint
# Third Party
import asyncpool
import aiobotocore.session
import aiobotocore.config
_NUM_WORKERS=500
async def execute_lambda( lambda_name: str, key: str, client):
# Get json content from s3 object
if 1:
name=lambda_name
response = await client.invoke(
InvocationType='RequestResponse',
FunctionName=name,
LogType='Tail',
Payload=json.dumps({
'exec_id':key,
})
)
out=[]
async for event in response['Payload']:
out.append(event.decode())
#await asyncio.sleep(1)
return out
async def submit(lambda_name: str) -> List[dict]:
"""
Returns list of AWS Lambda outputs executed in parallel
:param name: name of lambda function
:return: list of lambda returns
"""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
session = aiobotocore.session.AioSession()
config = aiobotocore.config.AioConfig(max_pool_connections=_NUM_WORKERS)
contents = []
#client = boto3.client('lambda', region_name='us-west-2')
async with session.create_client('lambda', region_name='us-west-2', config=config) as client:
worker_co = partial(execute_lambda, lambda_name)
async with asyncpool.AsyncPool(None, _NUM_WORKERS, 'lambda_work_queue', logger, worker_co,
return_futures=True, raise_on_join=True, log_every_n=10) as work_pool:
for x in range(_NUM_WORKERS):
contents.append(await work_pool.push(x, client))
# retrieve results from futures
contents = [c.result() for c in contents]
return list(chain.from_iterable(contents))
def main(name, files):
s = time.perf_counter()
_loop = asyncio.get_event_loop()
_result = _loop.run_until_complete(submit(name))
pprint(_result)
elapsed = time.perf_counter() - s
print(f"{__file__} executed in {elapsed:0.2f} seconds.")
Lambda function:
import time
def lambda_handler(event, context):
time.sleep(10)
return {'code':0, 'exec_id':event['exec_id']}
Result:
'{"code": 0, "exec_id": 0}',
'{"code": 0, "exec_id": 1}',
'{"code": 0, "exec_id": 2}',
'{"code": 0, "exec_id": 3}',
...
'{"code": 0, "exec_id": 496}',
'{"code": 0, "exec_id": 497}',
'{"code": 0, "exec_id": 498}',
'{"code": 0, "exec_id": 499}']
my_cli_script.py executed in 14.56 seconds.
In response to the question asked in the comments here , here's the code I use to spin up 100 lambda instances in parallel:
import boto3
import json
from concurrent.futures import ThreadPoolExecutor
# AWS credentials are exported in my env variables
# so region and account-id are fetched from there
lambda_ = boto3.client('lambda')
def invoke_lambda(payload):
payload = {'body': json.dumps(payload)}
response = lambda_.invoke(
FunctionName='my-func',
# I need to receive a response back from lambda
# so I use sync invocation
InvocationType='RequestResponse',
LogType='Tail',
Payload=json.dumps(payload)
)
res_payload = response.get('Payload').read()
body = json.loads(res_payload).get('body')
return body
MAX_WORKERS = 100 # how many lambdas you want to spin up concurrently
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
result = list(executor.map(invoke_lambda, data))
# data is a list of dicts, each element is a single "payload"
Two last notes:
found this post: python-asyncio-aiohttp-valueerror-too-many-file-descriptors-in-select-on-win
After change it started to work
# 1000 is a soft concurrency limit
_NUM_WORKERS=990
def main(name, files):
if sys.platform == 'win32':
_loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(_loop)
_result = _loop.run_until_complete(submit(name))
else:
_loop = asyncio.get_event_loop()
_result = _loop.run_until_complete(submit(name))
process = psutil.Process(os.getpid())
print(f"{__file__}: memory[{process.memory_info().rss/1024:7,.2f}], elapsed {elapsed:0.2f} sec")
Result:
...
'{"code": 0, "exec_id": 986}',
'{"code": 0, "exec_id": 987}',
'{"code": 0, "exec_id": 988}',
'{"code": 0, "exec_id": 989}']
my_cli_script.py: memory[201,064.00], elapsed 16.53 sec
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.