簡體   English   中英

Python 3.6 os.environ變量的Lambda代碼錯誤

[英]Python 3.6 Lambda code error for os.environ variable

try :
        sf =                       Salesforce(username = sfdc_username, 
                                   password = sfdc_password, 
                                   security_token = sfdc_security_token, 
                                   instance_url = sfdc_salesforce_instance_url, 
                                   domain = sfdc_sandbox)
        print('salesforce login good')
    except (SalesforceGeneralError, 
            SalesforceMoreThanOneRecord, 
            SalesforceMalformedRequest, 
            SalesforceExpiredSession, 
            SalesforceRefusedRequest, 
            SalesforceResourceNotFound) as e :
        print(e.content[0]['message'])
        sys.exit(1)

lambda上的這部分代碼失敗並出現錯誤:

a bytes-like object is required, not 'str': TypeError
Traceback (most recent call last):
  File "/var/task/sfdc_etl/bin/sfdc_etl.py", line 80, in lambda_handler
    domain = sfdc_sandbox)
  File "/var/task/sfdc_etl/lib/python3.6/site-packages/simple_salesforce/api.py", line 146, in __init__
    domain=self.domain)
  File "/var/task/sfdc_etl/lib/python3.6/site-packages/simple_salesforce/login.py", line 80, in SalesforceLogin
    username = escape(username)
  File "/var/lang/lib/python3.6/html/__init__.py", line 19, in escape
    s = s.replace("&", "&") # Must be done first!
TypeError: a bytes-like object is required, not 'str'

當我在EC2 amazon linux上將此代碼移動到我的測試環境並將sfdc_sandox設置為'test'時,它沒有任何問題。 我嘗試使用os.environb [“L_SFDC_SANDBOX”]和os.environ [“L_SFDC_SANDBOX”] .codes('utf8'),但這也沒有幫助,因為它給出了同樣的錯誤。 當我在Lambda中引入此變量時,如何修復類型錯誤?

這是整個腳本,也許錯誤不是因為特定的代碼片段,即使它看起來像是。

import os
import sys

# this adds the parent directory of bin so we can find the  module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python2.7/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)

from awsmgr.awsmgr import S3Helper
from base64 import b64decode
import boto3
from collections import OrderedDict
import datetime
from dateutil.parser import parse
import logging
import json
import math
import pandas as pd
from simple_salesforce import Salesforce, SalesforceLogin
from simple_salesforce.exceptions import SalesforceGeneralError, SalesforceMoreThanOneRecord, SalesforceMalformedRequest, SalesforceExpiredSession, SalesforceRefusedRequest, SalesforceResourceNotFound
from sqlalchemy import create_engine
from sqlalchemy import exc

current_path = os.path.dirname(os.path.realpath(__file__))
# Use this one for the parent directory
ENV_ROOT =  os.path.abspath(os.path.join(current_path, os.path.pardir))
# Use this one for the current directory
#ENV_ROOT =  os.path.abspath(os.path.join(current_path))
sys.path.append(ENV_ROOT)

def lambda_handler(event, context):

    ###############################
    # Global Variable Definitions #
    ###############################
    d_parse =                       parse
    TMP_PATH =                      '/tmp'
    igersUser =                     'admin'
    igersPwd =                      boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["RS_PASSWORD"]))['Plaintext']
    igersHost =                     os.environ["RS_HOST"]
    igers =                         create_engine('postgres://{}:{}@{}/ibdrs'.format(igersUser, igersPwd, igersHost), encoding="utf-8")
    igersSchema =                   os.environ["RS_SCHEMA"]
    s3 =                            S3Helper(debug=os.environ["DEBUG"])
    nextObjFile =                   s3.get_s3_file('s3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/next_object.txt',os.path.abspath(os.path.join(TMP_PATH,'next_object.txt')))
    s3Destination =                 's3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/json/'
    s3Path =                        '{}_json'
    s3NextObjDestination =          's3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/{}'
    fileCount =                     1
    sfdc_username =                 os.environ["L_USERNAME"].encode('utf8')
    sfdc_salesforce_instance_url =  os.environ["L_SALESFORCE_INSTANCE_URL"].encode('utf8')    
    sfdc_password =                 boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_PASSWORD"]))['Plaintext']
    sfdc_security_token =           boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_SECURITY_TOKEN"]))['Plaintext']
    sfdc_sandbox        =           os.environ["L_SFDC_SANDBOX"].encode('utf8')

    print(type(sfdc_username), type(sfdc_password), type(sfdc_security_token), type(sfdc_salesforce_instance_url), type(sfdc_sandbox))
    try :
        sf =                       Salesforce(username = sfdc_username, 
                                   password = sfdc_password, 
                                   security_token = sfdc_security_token, 
                                   instance_url = sfdc_salesforce_instance_url, 
                                   domain = sfdc_sandbox)
        print('salesforce login good')
    except (SalesforceGeneralError, 
            SalesforceMoreThanOneRecord, 
            SalesforceMalformedRequest, 
            SalesforceExpiredSession, 
            SalesforceRefusedRequest, 
            SalesforceResourceNotFound) as e :
        print(e.content[0]['message'])
        sys.exit(1)

    # get nextobj from s3 
    with open(nextObjFile, 'r') as f :
        nextObjItem = f.read().strip().lower()
        nextObj = nextObjItem.lower()

    print('Processing {}'.format(nextObj))

    ######################################################
    # get rs table group permissions, store in dataframe #
    ######################################################
    def rsGetGroupPerms(igers, nextObj) : 
        global groupPerms
        groupPerms = {}
        existingGroupPerms = '''
        SELECT
            namespace, item, type, groname 
        FROM
            (
            SELECT
                use.usename AS subject,
                nsp.nspname AS NAMESPACE,
                cls.relname AS item,
                cls.relkind AS TYPE,
                use2.usename AS OWNER,
                cls.relacl 
            FROM
                pg_user use
                CROSS JOIN pg_class cls
                LEFT JOIN pg_namespace nsp ON cls.relnamespace = nsp.oid
                LEFT JOIN pg_user use2 ON cls.relowner = use2.usesysid 
            WHERE
                cls.relowner = use.usesysid 
                AND nsp.nspname NOT IN ( 'pg_catalog', 'pg_toast', 'information_schema' ) 
                AND nsp.nspname IN ( 'salesforce' ) 
                AND relacl IS NOT NULL 
            ORDER BY
                subject,
                NAMESPACE,
                item 
            )
            JOIN pg_group pu ON array_to_string( relacl, '|' ) LIKE'%%' || pu.groname || '%%'
            WHERE item = '{}'
        '''.format(nextObj)

        groupPerms = pd.read_sql(existingGroupPerms, igers)
        print('got the group permissions')
        return groupPerms

    #####################################################
    # get rs table user permissions, store in dataframe #
    #               NOT CURRENTLY IN USE                #
    #####################################################
    #def rsGetUseerPerms(igers, nextObj) :
    #    existingUserPerms = '''
    #    SELECT * 
    #    FROM 
    #    (
    #    SELECT 
    #        schemaname
    #        ,objectname
    #        ,usename
    #        ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'select') AND has_schema_privilege(usrs.usename, schemaname, 'usage')  AS sel
    #        ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'insert') AND has_schema_privilege(usrs.usename, schemaname, 'usage')  AS ins
    #        ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'update') AND has_schema_privilege(usrs.usename, schemaname, 'usage')  AS upd
    #        ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'delete') AND has_schema_privilege(usrs.usename, schemaname, 'usage')  AS del
    #        ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'references') AND has_schema_privilege(usrs.usename, schemaname, 'usage')  AS ref
    #    FROM
    #        (
    #        SELECT schemaname, 't' AS obj_type, tablename AS objectname, schemaname + '.' + tablename AS fullobj FROM pg_tables
    #        UNION
    #        SELECT schemaname, 'v' AS obj_type, viewname AS objectname, schemaname + '.' + viewname AS fullobj FROM pg_views
    #        ) AS objs
    #        ,(SELECT * FROM pg_user) AS usrs
    #    ORDER BY fullobj
    #    )
    #    WHERE (sel = true or ins = true or upd = true or del = true or ref = true)
    #    and objectname = '{}'
    #    '''.format(nextObj)
    #    
    #    userPerms = pd.read_sql_query(existingUserPerms, igers)
    #    return userPerms

    ####################################################
    # Connect to Salesforce, Query JSON, and Copy to S3#
    ####################################################
    def sfToS3(fileCount, sf, nextObj) :
        # Initiate list for returned data
        pulls = []

        # Pull initial Query
        sfobject = sf.restful('sobjects/{}/describe/'.format(nextObj), params=None)        
        fields_list = [record['name'] for record in sfobject['fields']]
        initialQuery = sf.query("SELECT {} FROM {}".format(','.join(fields_list),nextObj))
        #Send a single file or the first file to S3
        data = initialQuery['records']
        try :
            send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path)

            # Append initial query data to pulls
            if 'nextRecordsUrl' in initialQuery :
                pulls.append(initialQuery['nextRecordsUrl'])
                nextChunk = initialQuery['nextRecordsUrl']
                nextQuery = sf.query_more(nextChunk,True)
                if 'nextRecordsUrl' in nextQuery :
                    pulls.append(nextQuery['nextRecordsUrl'])
                    x = True         
                    fileCount = 2

                    while x == True:
                        try:                       
                            # set up while loop to re-query salesforce until returned
                            # query does not have a 'nextRecordsUrl' return value
                            # Query new 'nextRecordsUrl'
                            nextQuery = sf.query_more(nextQuery['nextRecordsUrl'],True)

                            # append new query to pulls
                            pulls.append(nextQuery['nextRecordsUrl'])
                        except: # This triggers when nextQuery['nextRecordsUrl'] does not exist
                            # set x to False to end loop
                            x = False 

            #if there was a follow on set of records, query it and add to S3      
            if len(pulls) >= 1 :
                for i in range(len(pulls)) :
                    data = sf.query_more(str(pulls[i].split('/')[5]))['records']

                    send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path)

                    fileCount += 1
            print('completed sending JSON files to S3')
        except :
            print('Salesforce Object Empty, ending execution')
            updateNextObj(nextObj, s3NextObjDestination)
            sys.exit(1)

    ####################
    #   JSONL to S3    #
    ####################
    def send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path) :
        fileName = '{}_file{}.json'
        localFilePath ='/tmp/'    

        for element in data :

            item = data.pop()
            item.pop('attributes', None)

            tempdict = OrderedDict({})
            for k,v in item.items() :
                if 'date' in k.lower() or 'stamp' in k.lower() :
                    if not v is None :
                        d = d_parse(v)
                        v = d.strftime('%Y-%m-%d %I:%M:%S')
                        tempdict[k.lower()] = v
                else :
                    tempdict[k.lower()] = v

                with open(localFilePath+fileName.format(nextObj,fileCount), 'a') as outfile :
                    outfile.write(json.dumps(tempdict))
                    outfile.write('\n')

        s3.put_s3_file_datedpath(localFilePath+fileName.format(nextObj,fileCount),s3Destination+s3Path.format(nextObj))
        os.remove(localFilePath+fileName.format(nextObj,fileCount))

    #################################################
    #   maps SFDC type to SQL type - used for ddl   #
    #################################################
    def map_data_type(sfdc_type, length):
            """
            Definition to map Salesforce datatype to Redshift datatype.
            """
            __MULTIPLIER = 1.3 # may not be Zero!

            if length == 0:
                length = 1092
            if length == 4095:
                length = 15000
            if length > 65535:
                length = 65534

            if sfdc_type == u'boolean':
               return u'varchar(5)'
            elif  sfdc_type == u'date':
               return u'timestamp'
            elif sfdc_type == u'datetime':
               return  u'timestamp'
            elif sfdc_type == u'currency':
               return  u'decimal(38,6)'
            elif sfdc_type == u'double':
               return  u'decimal(38,6)'
            elif sfdc_type == u'int':
               return  u'numeric(10)'
            elif sfdc_type == u'picklist':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'id':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'reference':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'textarea':
                if length >= (65535/length*__MULTIPLIER):
                    return  u'varchar({})'.format(65534)
                else:
                    return  u'varchar({})'.format( math.ceil(length*__MULTIPLIER))
            elif sfdc_type == u'email':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'phone':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'url':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'multipicklist':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'anyType':
                if length >= 65535:
                    return  u'varchar({})'.format(65534)
                else:
                    return  u'varchar({})'.format(math.ceil(length*__MULTIPLIER))
            elif sfdc_type == u'percent':
               return  u'numeric(38,6)'
            elif sfdc_type == u'combobox':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'base64':
               return  u'varchar({})'.format(length)
            elif sfdc_type == u'time':
               return  u'varchar(255)'
            elif sfdc_type == u'string':
                if length >= 65535:
                    return  u'varchar({})'.format(65534)
                else:
                    return  u'varchar({})'.format(math.ceil(length*__MULTIPLIER))
            else:
               return  u'varchar(65535)'

    ####################################
    #   Turn SFDC metadata into SQL    #
    ####################################
    def get_ddl(sf, nextObj, igersSchema, col_remove=None):
        md = sf.restful("sobjects/{}/describe/".format(nextObj), params=None)
        target_table=nextObj
        total_field_count = 0
        global ddl_str
        ddl_str = ''

        ddl_str += 'CREATE TABLE '+ igersSchema+"."+target_table +' ('
        for x in md["fields"]:
            #print x["name"]
            if col_remove: 
                if x["name"].lower() in [element.lower() for element in col_remove]:
                    print("Skipping: {}".format(x["name"]))
                    continue
            ddl_str += x["name"] + ' ' + map_data_type(x["type"],x["length"])
            if x["name"] == 'Id':
                 ddl_str += ' NOT NULL DISTKEY'
            ddl_str +=  ","              
            total_field_count = total_field_count + 1
        ddl_str = ddl_str[:-1]
        ddl_str += ')'
        logging.info('DDL Successfully created...')
    #    print("Total Field Count: "+str(total_field_count))
        return ddl_str

    #########################
    # Create Table from DDL, execute the copy query and update permissions #
    #########################
    def rs_operations(ddl_str, groupPerms, igersSchema, nextObj, s3Destination, s3Path, igers) :
        today = datetime.date.today()
        dated_path = today.strftime('%Y/%m/%d')
        perms_statement = ''    

        drop_table = '''
            DROP TABLE IF EXISTS {}.{} CASCADE
            '''.format(igersSchema, nextObj)

        loadQuery = '''
            COPY {}.{}
            FROM '{}{}/{}/'
            iam_role 'arn:aws:iam::087024238921:role/LambdaFullAccessRole'
            TRUNCATECOLUMNS
            FORMAT AS JSON 'auto'
        '''.format(igersSchema, nextObj, s3Destination, s3Path.format(nextObj), dated_path)

        grantPerms = '''
        GRANT SELECT ON {}.{} TO GROUP {}
        '''

        with igers.connect() as conn:
            try :
                conn.execute(drop_table)
                print('completed drop table')
                conn.execute(ddl_str)
                print('completed create table')
                conn.execute(loadQuery)
                print('completed load query')
                for row in range(len(groupPerms)) :
                    perms_statement = grantPerms.format(groupPerms['namespace'].iloc[row],groupPerms['item'].iloc[row],groupPerms['groname'].iloc[row])
                    conn.execute(perms_statement)
                print('completed grant group permissions')
                conn.close()
            except exc.SQLAlchemyError as e :
                print(e)  

    ######################################
    # Update Next Object and Write to S3 #
    ######################################
    def updateNextObj(nextObj, s3NextObjDestination) :
        objectsList = []
        objectsFile = s3.get_s3_file('s3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/sfdc_etl_objects.txt',os.path.abspath(os.path.join(TMP_PATH,'sfdc_etl_objects.txt')))
        localNobjTempFile = os.path.abspath(os.path.join(TMP_PATH,'next_object.txt'))
        nextObjText = ''

        with open (objectsFile, 'r') as objs :
            for line in objs :
                objectsList.append(line.strip("\n"))

        for i in range(len(objectsList)-1) :
            if objectsList[i].lower() == nextObj :
                nextObjText = objectsList[i+1]
                print(nextObjText)

        with open (localNobjTempFile, 'w') as f :
            f.write(nextObjText)

        s3.put_s3_file(localNobjTempFile,s3NextObjDestination.format('next_object.txt'))
        print('completed Updating the next object')

    ################################################
    #     Test if the object exists and execute    #
    ################################################
    try :
        getattr(sf,nextObj).describe()
    except (SalesforceGeneralError, 
            SalesforceMoreThanOneRecord, 
            SalesforceMalformedRequest, 
            SalesforceExpiredSession, 
            SalesforceRefusedRequest, 
            SalesforceResourceNotFound) as e :
        print(e.content[0]['message'] +', writing next object and ending')
        updateNextObj(nextObj, s3NextObjDestination)
        sys.exit(1)

    rsGetGroupPerms(igers, nextObj)
    sfToS3(fileCount, sf, nextObj)
    get_ddl(sf, nextObj, igersSchema, col_remove=None)
    rs_operations(ddl_str, groupPerms, igersSchema, nextObj, s3Destination, s3Path, igers)
    updateNextObj(nextObj, s3NextObjDestination)

問題是以下幾行

sfdc_password = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_PASSWORD"]))['Plaintext']

返回一個bytes對象,另一方面, Salesforce類需要一個字符串。

解:


try :
    sf = Salesforce(username=sfdc_username,
                    password=sfdc_password.decode("utf-8"),
                    security_token=sfdc_security_token,
                    instance_url=sfdc_salesforce_instance_url,
                    domain=sfdc_sandbox)
    print('salesforce login good')

請注意,使用.decode bytes方法將sfdc_password變量轉換為字符串

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM