[英]Validating and unzipping a zip file in AWS lambda
要求指出 lambda function 必须检查 zip 文件中是否有任何已在 function 中定义的排除文件扩展名。我已经概述了成功运行所需的步骤。
上述所有步骤都在发生,但我的代码中似乎出现了一个属性错误,如下所述。 非常感谢任何想法/解决方案。
import json
import zipfile
import os
import boto3
from urllib.parse import unquote_plus
import io
import re
import gzip
exclude_list = [".exe", ".scr", ".vbs", ".js", ".xml", "docm", ".xps"]
sns = boto3.client('sns' )
def read_nested_zip(tf, bucket, key, s3_client):
print(key)
print ("search for.zip:",re.search(r'\.zip', key, re.IGNORECASE))
## need to add exception handling
##if re.search(r'\.gzip$', key, re.IGNORECASE):
## print ('gzip file found')
## fil = gzip.GzipFile(tf, mode='rb')
if re.search(r'\.zip$', key, re.IGNORECASE):
print ('zip file found')
fil = zipfile.ZipFile(tf, "r").namelist()
else:
fil = ()
print ('no file found')
print (fil)
##with fil as zipf:
##try to narrow scope - run loop else exit
for file in fil:
print(file)
if re.search(r'(\.zip|)$', file, re.IGNORECASE):
childzip = io.BytesIO(fil.read(file))
read_nested_zip(childzip, bucket, key, s3_client)
else:
if any(x in file.lower() for x in exclude_list):
print("Binary, dont load")
print(file)
print(bucket)
print(key)
env = bucket.split('-')[2].upper()
# Copy the parent zip to a separate folder and remove it from the path
copy_source = {'Bucket': bucket, 'Key': key}
s3_client.copy_object(Bucket=bucket, CopySource=copy_source, Key='do_not_load_'+key)
s3_client.delete_object(Bucket = bucket, Key = key)
sns.publish(
TopicArn = 'ARN',
Subject = env + ': S3 upload warning: Non standard File encountered ',
Message = 'Non standard File encountered' + key + ' uploaded to bucket ' + bucket + ' The file has been moved to ' + 'do_not_load_'+key
)
else:
print("File in supported formats, can be loaded " + file)
#folder = re.sub(r"\/[^/]+$", "",key)
folder = "/".join(key.split("/", 2)[:2]) + "/unzipped"
print(folder)
print("Bucket is "+ bucket)
print("file to copy is "+ file)
buffer = io.BytesIO(fil.read(file))
s3_resource = boto3.resource('s3')
s3_resource.meta.client.upload_fileobj(buffer,Bucket=bucket,Key= folder + '/' + file)
s3_resource.Object(bucket, folder + '/' + file).wait_until_exists()
def lambda_handler(event, context):
print(event)
for record in event['Records']:
s3_client = boto3.client('s3')
key = unquote_plus(record['s3']['object']['key'])
print(key)
print (type(key))
size = record['s3']['object']['size']
bucket = record['s3']['bucket']['name']
obj = s3_client.get_object(Bucket=bucket, Key=key)
print(obj)
putObjects = []
with io.BytesIO(obj["Body"].read()) as tf:
# rewind the file
#tf.seek(0)
read_nested_zip(tf, bucket, key, s3_client)
错误代码[ERROR] AttributeError: 'list' object has no attribute 'read' Traceback (most recent call last): File "/var/task/lambda_function.py", line 85, in lambda_handler read_nested_zip(tf, bucket, key , s3_client) 文件“/var/task/lambda_function.py”,第 35 行,在 read_nested_zip childzip = io.BytesIO(fil.read())
我尝试过的事情:1。
childzip = io.BytesIO(fil.read(file))
#tried switching the childzip = io.BytesIO(fil.read()) #still failed
changed
childzip = io.BytesIO(fil)
[ERROR] AttributeError: module 'zipfile' has no attribute 'read'
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 85, in lambda_handler
read_nested_zip(tf, bucket, key, s3_client)
File "/var/task/lambda_function.py", line 25, in read_nested_zip
fil = zipfile.read(tf, "r").namelist()
任何想法表示赞赏。 最好
只要 ZIP 文件不是太大,我建议将 ZIP 文件下载到 Lambda 函数的/tmp
文件夹,然后使用 zipfile 上下文管理器来简化对 ZIP 文件的访问。 或者,您可以 stream ZIP 文件,但可能仍使用上下文管理器。
请注意,我包含了专门从 ZIP 文件中读取文件字节内容的代码。 请参阅下面的bytes = myzip.read(name)
。
例如:
import json
import os
import zipfile
import boto3
from urllib.parse import unquote_plus
ZIP_NAME = "/tmp/local.zip"
EXCLUDE_LIST = [".exe", ".scr", ".vbs", ".js", ".xml", "docm", ".xps"]
s3 = boto3.client("s3")
def process_zip(bucket, key):
s3.download_file(bucket, key, ZIP_NAME)
with zipfile.ZipFile(ZIP_NAME, "r") as myzip:
namelist = myzip.namelist()
for name in namelist:
print("Zip contains:", name)
extensions = [os.path.splitext(name)[1] for name in namelist]
print("Extensions:", extensions)
if any(extension in EXCLUDE_LIST for extension in extensions):
print("Banned extensions present in:", extensions)
os.remove(ZIP_NAME)
return
for name in namelist:
print("Zip read:", name)
bytes = myzip.read(name)
# your code here ...
os.remove(ZIP_NAME)
def lambda_handler(event, context):
for record in event.get("Records", []):
key = unquote_plus(record["s3"]["object"]["key"])
bucket = record["s3"]["bucket"]["name"]
if os.path.splitext(key)[1] == ".zip":
process_zip(bucket, key)
return {"statusCode": 200, "body": json.dumps("OK")}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.