简体   繁体   中英

Export DynamoDB to CSV on S3 with Lambda function (python)

Hello im trying to generate a CSV from dynamoDB to S3 using lambda function. the thing is I just get an empty file on s3. Please your help!

import csv
import boto3
import json
dynamodb = boto3.resource('dynamodb')
db = dynamodb.Table('ReporteTelefonica')
def lambda_handler(event, context):
    AWS_BUCKET_NAME = 'reportetelefonica'
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(AWS_BUCKET_NAME)
    path = 'test.csv'
    try:
        response = db.scan()
        myFile = open(path, 'w')  

        for i in response['Items']:
            csv.register_dialect('myDialect', delimiter=',', quoting=csv.QUOTE_NONE)
            with myFile:
                writer = csv.writer(myFile, dialect='myDialect')
                writer.writerows(i)
            print(i)
    except :
        print("error")

    bucket.put_object(
        ACL='public-read-write',
        ContentType='application/csv',
        Key=path,
        # Body=json.dumps(i),
    )
    # print("here")
    body = {
        "uploaded": "true",
        "bucket": AWS_BUCKET_NAME,
        "path": path,
    }
    # print("then here")
    return {
        "statusCode": 200,
        "body": json.dumps(body)
    }

I'm kind of noob on this, so I was wondering what should I modify to successfully make a complete scan of the table and write the values on the CSV on S3???

You either have to close the file after you finished writing the cvs records and then reopen for reading and pass to the put_obkect method. Alternatively you open the file for reading and writing and after writing you seek to position 0 so that the put_object method reads from the start.

Here's a working lambda that will do the job.

import boto3
import json
import os
import pandas as pd

TABLE_NAME = os.environ.get("DDB_TABLE_NAME")
OUTPUT_BUCKET = os.environ.get("BUCKET_NAME")
TEMP_FILENAME = '/tmp/export.csv'
OUTPUT_KEY = 'export.csv'

s3_resource = boto3.resource('s3')
dynamodb_resource = boto3.resource('dynamodb')
table = dynamodb_resource.Table(TABLE_NAME)


def lambda_handler(event, context):
    response = table.scan()
    df = pd.DataFrame(response['Items'])
    df.to_csv(TEMP_FILENAME, index=False, header=True)

    # Upload temp file to S3
    s3_resource.Bucket(OUTPUT_BUCKET).upload_file(TEMP_FILENAME, OUTPUT_KEY)

    return {
        'statusCode': 200,
        'headers': {
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Credentials": True,
            "content-type": "application/json"
        },
        'body': json.dumps('OK')
    }

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM