简体   繁体   中英

Role assumption errors with boto3, aws workdocs, and Lambda execution with python

I have the confirmed the following policy and trust relationships in account-b

Trust Relationship

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "AWS": "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session"
      },
      "Action": "sts:AssumeRole",
      "Condition": {}
    }
  ]
}

And the associated policy:

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
                "workdocs:GetDocumentPath",
                "workdocs:GetCurrentUser",
                "workdocs:CreateNotificationSubscription",
                "workdocs:DescribeAvailableDirectories",
                "workdocs:UpdateFolder",
                "workdocs:CheckAlias",
                "workdocs:DownloadDocumentVersion",
                "workdocs:GetResources",
                "workdocs:DescribeActivities",
                "workdocs:DescribeRootFolders",
                "workdocs:UpdateDocument",
                "workdocs:CreateFolder",
                "workdocs:GetFolder",
                "workdocs:InitiateDocumentVersionUpload",
                "workdocs:DescribeResourcePermissions",
                "workdocs:DescribeDocumentVersions",
                "workdocs:CreateLabels",
                "workdocs:DescribeGroups",
                "workdocs:DescribeNotificationSubscriptions",
                "workdocs:DescribeFolderContents",
                "workdocs:AbortDocumentVersionUpload",
                "workdocs:DescribeComments",
                "workdocs:GetDocumentVersion",
                "workdocs:AddResourcePermissions",
                "workdocs:DescribeInstances",
                "workdocs:GetDocument",
                "workdocs:DescribeUsers",
                "workdocs:CreateComment",
                "workdocs:CreateCustomMetadata",
                "workdocs:UpdateDocumentVersion",
                "workdocs:GetFolderPath"
            ],
            "Resource": "*"
        }
    ]
}

here is my current code:

import os
import sys

# this adds the parent directory of bin so we can find the  module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python3.6/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)

from base64 import b64decode
import boto3
from io import BytesIO
import datetime
from openpyxl import load_workbook
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests

workdocs_region ='us-west-2'
services_region = 'us-east-1'
wd_role_arn = 'arn:aws:iam::account-b:role/WorkDocs_API_Developer'


def assume_role(wd_role_arn):
    sts = boto3.client('sts')
    creds = sts.assume_role(RoleArn=wd_role_arn,
                            RoleSessionName='workdocs_session'
                            )['Credentials']
    return creds

def get_data():
    role = assume_role(wd_role_arn) 
    client = boto3.client('workdocs',
                          aws_access_key_id=role['AccessKeyId'],
                          aws_secret_access_key=role['SecretAccessKey'],
                          aws_session_token=role['SessionToken'],
                          region_name=workdocs_region
                          )

    folder_id = os.environ['FOLDER_ID']
    doc_id = os.environ['DOC_ID']

    if not doc_id:   
        documents = client.describe_folder_contents(FolderId = folder_id)
        file = os.environ['FILE_NAME'].replace(' ','_')
        for d in documents['Documents']:
            if file in d['LatestVersionMetadata']['Name'].replace(' ','_'):
                print(d['LatestVersionMetadata']['Id'])
                doc_id = d['LatestVersionMetadata']['Id']

    doc_meta = client.get_document(
                    DocumentId=doc_id
                    )

    latest_doc = client.get_document_version(
                    DocumentId=doc_meta['Metadata']['Id'],
                    VersionId=doc_meta['Metadata']['LatestVersionMetadata']['Id'],
                    Fields='SOURCE'
                    )

    document_url = latest_doc['Metadata']['Source']['ORIGINAL']
    document_name = latest_doc['Metadata']['Name']
    r = requests.get(document_url)

    wb = load_workbook(filename=BytesIO(r.content))
    for s in wb.sheetnames:
        ws = wb[s]
        data = ws.values
        columns = next(data)[0:]
        columns = [item.lower().replace(' ', '_').strip() for item in columns]
        df = pd.DataFrame(data, columns=columns)
        df['snapshot_date'] = datetime.date.today()
        if os.environ['OUT_LOCATION'] in ['Redshift', 'redshift', 'rs'] :
            redshift_load(df)
        elif os.environ['OUT_LOCATION'] in ['S3' , 's3'] :
            s3_load(df, s)

def redshift_load(df):
    rs = os.environ['REDSHIFT_INSTANCE']
    rs_port = os.environ['REDSHIFT_PORT']
    rs_db = os.environ['REDSHIFT_DB']
    rs_user = os.environ['REDSHIFT_USER']
    rs_password = boto3.client('kms', region_name=services_region).decrypt(CiphertextBlob=b64decode(os.environ['REDSHIFT_PASSWORD']))['Plaintext']
    engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(rs_user,rs_password.decode('utf-8'),rs,rs_port,rs_db))
    schema = os.environ['SCHEMA_NAME']
    table = os.environ['TABLE_NAME']
    df.to_sql(table, engine, schema, if_exists='append', index=False, method='multi')

def s3_load(df, sheet):
    session = boto3.Session()
    creds = session.get_credentials()
    client = boto3.client('s3',
                          aws_access_key_id=creds.access_key,
                          aws_secret_access_key=creds.secret_key,
                          aws_session_token=creds.token,
                          region_name=services_region,
                          )        

    csv = df.to_csv(index=False)
    key = datetime.datetime.today().strftime('%Y/%m/%d') + '/' + sheet + '.csv'
    client.put_object(Bucket=os.environ['BUCKET'], Key=key, Body=csv)


def lambda_handler(event, context) :          
    get_data()

can anyone tell me why I would get this error:

"errorMessage": "An error occurred (UnauthorizedResourceAccessException) when calling the GetDocument operation: Principal [arn:aws:sts::289497978546:assumed-role/WorkDocs_API_Developer/workdocs_session] is not allowed to execute [workdocs:GetDocument] on the resource"

I cannot for the life of me figure it out.

The lambda execution role has the following in account-a

trust policy

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "lambda.amazonaws.com",
        "AWS": [
          "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session",
          "arn:aws:iam::account-b:role/WorkDocs_API_Developer",
          "arn:aws:sts::account-a:assumed-role/Lambda-WorkDocs/corpworkdocs_api_pull_infra"
        ]
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

Resource Policy:

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Action": [
                "workdocs:*"
            ],
            "Effect": "Allow",
            "Resource": [
                "*",
                "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session"
            ]
        }
    ]
}

Is there something I didn't do in my code? Is there some kind of problem with the trust or resource policies?

The answer is that I was using the incorrect ID the existing code was pulling Id from latest metadata. That is not the actual document Id. I don't know what the Id is for in that section, but it isn't document.

You have to go back to Document for the correct Id. Here is the code:

import os
import sys

# this adds the parent directory of bin so we can find the  module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python3.6/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)

from base64 import b64decode
import boto3
from io import BytesIO
import datetime
from openpyxl import load_workbook
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests

workdocs_region ='us-west-2'
services_region = 'us-east-1'
wd_role_arn = 'arn:aws:iam::account-b:role/WorkDocs_API_Developer'


def assume_role(wd_role_arn):
    sts = boto3.client('sts')
    creds = sts.assume_role(RoleArn=wd_role_arn,
                            RoleSessionName='workdocs_session'
                            )['Credentials']
    return creds

def get_data():
    role = assume_role(wd_role_arn) 
    client = boto3.client('workdocs',
                          aws_access_key_id=role['AccessKeyId'],
                          aws_secret_access_key=role['SecretAccessKey'],
                          aws_session_token=role['SessionToken'],
                          region_name=workdocs_region
                          )

    folder_id = os.environ['FOLDER_ID']
    doc_id = os.environ['DOC_ID']

    if not doc_id:   
        documents = client.describe_folder_contents(FolderId = folder_id)
        file = os.environ['FILE_NAME'].replace(' ','_')
        for d in documents['Documents']:
            if file in d['LatestVersionMetadata']['Name'].replace(' ','_'):
                doc_id = d['Id']

    doc_meta = client.get_document(
                    DocumentId=doc_id
                    )

    latest_doc = client.get_document_version(
                    DocumentId=doc_meta['Metadata']['Id'],
                    VersionId=doc_meta['Metadata']['LatestVersionMetadata']['Id'],
                    Fields='SOURCE'
                    )

    document_url = latest_doc['Metadata']['Source']['ORIGINAL']
    document_name = latest_doc['Metadata']['Name']
    r = requests.get(document_url)

    wb = load_workbook(filename=BytesIO(r.content))
    for s in wb.sheetnames:
        ws = wb[s]
        data = ws.values
        columns = next(data)[0:]
        columns = [item.lower().replace(' ', '_').strip() for item in columns]
        df = pd.DataFrame(data, columns=columns)
        df['snapshot_date'] = datetime.date.today()
        if os.environ['OUT_LOCATION'] in ['Redshift', 'redshift', 'rs'] :
            redshift_load(df)
        elif os.environ['OUT_LOCATION'] in ['S3' , 's3'] :
            s3_load(df, s)

def redshift_load(df):
    rs = os.environ['REDSHIFT_INSTANCE']
    rs_port = os.environ['REDSHIFT_PORT']
    rs_db = os.environ['REDSHIFT_DB']
    rs_user = os.environ['REDSHIFT_USER']
    rs_password = boto3.client('kms', region_name=services_region).decrypt(CiphertextBlob=b64decode(os.environ['REDSHIFT_PASSWORD']))['Plaintext']
    engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(rs_user,rs_password.decode('utf-8'),rs,rs_port,rs_db))
    schema = os.environ['SCHEMA_NAME']
    table = os.environ['TABLE_NAME']
    df.to_sql(table, engine, schema, if_exists='append', index=False, method='multi')

def s3_load(df, sheet):
    session = boto3.Session()
    creds = session.get_credentials()
    client = boto3.client('s3',
                          aws_access_key_id=creds.access_key,
                          aws_secret_access_key=creds.secret_key,
                          aws_session_token=creds.token,
                          region_name=services_region,
                          )        

    csv = df.to_csv(index=False)
    key = datetime.datetime.today().strftime('%Y/%m/%d') + '/' + sheet + '.csv'
    client.put_object(Bucket=os.environ['BUCKET'], Key=key, Body=csv)


def lambda_handler(event, context) :          
    get_data()

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM