简体   繁体   中英

How to read a CSV file from s3 and write the content in RDS database table using python lambda function?

I have a CSV file Employee.csv in the S3 bucket with all info about employee: name, age, salary, designation. I have to write a python lambda function to read this file and write in RDS db such as it should create a table as Employee, with columns name, age, salary, designation and rows will have the data.

The Employee.csv is just for example, actually it can be any csv file with any number of columns in it.

from __future__ import print_function
import boto3
import logging
import os
import sys
import uuid
import pymysql
import csv
import rds_config


rds_host  = rds_config.rds_host
name = rds_config.db_username
password = rds_config.db_password
db_name = rds_config.db_name


logger = logging.getLogger()
logger.setLevel(logging.INFO)

try:
    conn = pymysql.connect(rds_host, user=name, passwd=password, db=db_name, connect_timeout=5)
except Exception as e:
    logger.error("ERROR: Unexpected error: Could not connect to MySql instance.")
    logger.error(e)
    sys.exit()

logger.info("SUCCESS: Connection to RDS mysql instance succeeded")

s3_client = boto3.client('s3')

def handler(event, context):

    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key'] 
    download_path = '/tmp/{}{}'.format(uuid.uuid4(), key)

    s3_client.download_file(bucket, key,download_path)

    csv_data = csv.reader(file( download_path))

    with conn.cursor() as cur:
        for idx, row in enumerate(csv_data):

            logger.info(row)
            try:
                cur.execute('INSERT INTO target_table(name, age, salary, designation)' \
                                'VALUES("%s", "%s", "%s", "%s")'
                                , row)
            except Exception as e:
                logger.error(e)

            if idx % 100 == 0:
                conn.commit()

        conn.commit()

    return 'File loaded into RDS:' + str(download_path)

Here is the code which is working for me now:

s3 = boto3.resource('s3')
    file_object=event['Records'][0]
    key=str(file_object['s3']['object']['key'])
    obj = s3.Object(bucket, key)

    content_lines=obj.get()['Body'].read().decode('utf-8').splitlines(True)

    tableName= key.strip('folder/').strip('.csv')

    with conn.cursor() as cur:
        try:
            cur.execute('TRUNCATE TABLE '+tableName)
        except Exception as e:
            print("ERROR: Unexpected error:Table does not exit.")
            sys.exit()        
        header=True
        for row in csv.reader(content_lines):
            if(header):
                numberOfColumns=len(row)
                columnNames= str(row).replace('[','').replace(']','').replace("'",'')
                print("columnNames:"+columnNames)
                values='%s'
                numberOfValues=len(values)
                numberOfValues=1
                while numberOfValues< numberOfColumns:    
                    values=values+",%s"
                    numberOfValues+=1
                print("INSERT into "+tableName+"("+columnNames+") VALUES("+values+")")
                header=False
            else:
                try:
                    cur.execute('INSERT into '+tableName+'('+columnNames+') VALUES('+values+')', row)
                except Exception as e:
                    raise e
        conn.commit()

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM