繁体   English   中英

boto3、aws workdocs 和 Lambda 执行与 python 的角色假设错误

[英]Role assumption errors with boto3, aws workdocs, and Lambda execution with python

我在 account-b 中确认了以下政策和信任关系

信任关系

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "AWS": "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session"
      },
      "Action": "sts:AssumeRole",
      "Condition": {}
    }
  ]
}

以及相关政策:

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
                "workdocs:GetDocumentPath",
                "workdocs:GetCurrentUser",
                "workdocs:CreateNotificationSubscription",
                "workdocs:DescribeAvailableDirectories",
                "workdocs:UpdateFolder",
                "workdocs:CheckAlias",
                "workdocs:DownloadDocumentVersion",
                "workdocs:GetResources",
                "workdocs:DescribeActivities",
                "workdocs:DescribeRootFolders",
                "workdocs:UpdateDocument",
                "workdocs:CreateFolder",
                "workdocs:GetFolder",
                "workdocs:InitiateDocumentVersionUpload",
                "workdocs:DescribeResourcePermissions",
                "workdocs:DescribeDocumentVersions",
                "workdocs:CreateLabels",
                "workdocs:DescribeGroups",
                "workdocs:DescribeNotificationSubscriptions",
                "workdocs:DescribeFolderContents",
                "workdocs:AbortDocumentVersionUpload",
                "workdocs:DescribeComments",
                "workdocs:GetDocumentVersion",
                "workdocs:AddResourcePermissions",
                "workdocs:DescribeInstances",
                "workdocs:GetDocument",
                "workdocs:DescribeUsers",
                "workdocs:CreateComment",
                "workdocs:CreateCustomMetadata",
                "workdocs:UpdateDocumentVersion",
                "workdocs:GetFolderPath"
            ],
            "Resource": "*"
        }
    ]
}

这是我当前的代码:

import os
import sys

# this adds the parent directory of bin so we can find the  module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python3.6/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)

from base64 import b64decode
import boto3
from io import BytesIO
import datetime
from openpyxl import load_workbook
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests

workdocs_region ='us-west-2'
services_region = 'us-east-1'
wd_role_arn = 'arn:aws:iam::account-b:role/WorkDocs_API_Developer'


def assume_role(wd_role_arn):
    sts = boto3.client('sts')
    creds = sts.assume_role(RoleArn=wd_role_arn,
                            RoleSessionName='workdocs_session'
                            )['Credentials']
    return creds

def get_data():
    role = assume_role(wd_role_arn) 
    client = boto3.client('workdocs',
                          aws_access_key_id=role['AccessKeyId'],
                          aws_secret_access_key=role['SecretAccessKey'],
                          aws_session_token=role['SessionToken'],
                          region_name=workdocs_region
                          )

    folder_id = os.environ['FOLDER_ID']
    doc_id = os.environ['DOC_ID']

    if not doc_id:   
        documents = client.describe_folder_contents(FolderId = folder_id)
        file = os.environ['FILE_NAME'].replace(' ','_')
        for d in documents['Documents']:
            if file in d['LatestVersionMetadata']['Name'].replace(' ','_'):
                print(d['LatestVersionMetadata']['Id'])
                doc_id = d['LatestVersionMetadata']['Id']

    doc_meta = client.get_document(
                    DocumentId=doc_id
                    )

    latest_doc = client.get_document_version(
                    DocumentId=doc_meta['Metadata']['Id'],
                    VersionId=doc_meta['Metadata']['LatestVersionMetadata']['Id'],
                    Fields='SOURCE'
                    )

    document_url = latest_doc['Metadata']['Source']['ORIGINAL']
    document_name = latest_doc['Metadata']['Name']
    r = requests.get(document_url)

    wb = load_workbook(filename=BytesIO(r.content))
    for s in wb.sheetnames:
        ws = wb[s]
        data = ws.values
        columns = next(data)[0:]
        columns = [item.lower().replace(' ', '_').strip() for item in columns]
        df = pd.DataFrame(data, columns=columns)
        df['snapshot_date'] = datetime.date.today()
        if os.environ['OUT_LOCATION'] in ['Redshift', 'redshift', 'rs'] :
            redshift_load(df)
        elif os.environ['OUT_LOCATION'] in ['S3' , 's3'] :
            s3_load(df, s)

def redshift_load(df):
    rs = os.environ['REDSHIFT_INSTANCE']
    rs_port = os.environ['REDSHIFT_PORT']
    rs_db = os.environ['REDSHIFT_DB']
    rs_user = os.environ['REDSHIFT_USER']
    rs_password = boto3.client('kms', region_name=services_region).decrypt(CiphertextBlob=b64decode(os.environ['REDSHIFT_PASSWORD']))['Plaintext']
    engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(rs_user,rs_password.decode('utf-8'),rs,rs_port,rs_db))
    schema = os.environ['SCHEMA_NAME']
    table = os.environ['TABLE_NAME']
    df.to_sql(table, engine, schema, if_exists='append', index=False, method='multi')

def s3_load(df, sheet):
    session = boto3.Session()
    creds = session.get_credentials()
    client = boto3.client('s3',
                          aws_access_key_id=creds.access_key,
                          aws_secret_access_key=creds.secret_key,
                          aws_session_token=creds.token,
                          region_name=services_region,
                          )        

    csv = df.to_csv(index=False)
    key = datetime.datetime.today().strftime('%Y/%m/%d') + '/' + sheet + '.csv'
    client.put_object(Bucket=os.environ['BUCKET'], Key=key, Body=csv)


def lambda_handler(event, context) :          
    get_data()

谁能告诉我为什么我会收到这个错误:

"errorMessage": "An error occurred (UnauthorizedResourceAccessException) when calling the GetDocument operation: Principal [arn:aws:sts::289497978546:assumed-role/WorkDocs_API_Developer/workdocs_session] is not allowed to execute [workdocs:GetDocument] on the resource"

我一生都无法弄清楚。

lambda 执行角色在 account-a 中有以下内容

信托政策

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "lambda.amazonaws.com",
        "AWS": [
          "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session",
          "arn:aws:iam::account-b:role/WorkDocs_API_Developer",
          "arn:aws:sts::account-a:assumed-role/Lambda-WorkDocs/corpworkdocs_api_pull_infra"
        ]
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

资源政策:

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Action": [
                "workdocs:*"
            ],
            "Effect": "Allow",
            "Resource": [
                "*",
                "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session"
            ]
        }
    ]
}

有什么我没有在我的代码中做的吗? 信任或资源策略是否存在某种问题?

答案是我使用了错误的 ID,现有代码从最新的元数据中提取了 Id。 那不是实际的文档 ID。 我不知道该部分中的 Id 是什么,但它不是文档。

您必须返回到文档以获取正确的 ID。 这是代码:

import os
import sys

# this adds the parent directory of bin so we can find the  module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python3.6/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)

from base64 import b64decode
import boto3
from io import BytesIO
import datetime
from openpyxl import load_workbook
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests

workdocs_region ='us-west-2'
services_region = 'us-east-1'
wd_role_arn = 'arn:aws:iam::account-b:role/WorkDocs_API_Developer'


def assume_role(wd_role_arn):
    sts = boto3.client('sts')
    creds = sts.assume_role(RoleArn=wd_role_arn,
                            RoleSessionName='workdocs_session'
                            )['Credentials']
    return creds

def get_data():
    role = assume_role(wd_role_arn) 
    client = boto3.client('workdocs',
                          aws_access_key_id=role['AccessKeyId'],
                          aws_secret_access_key=role['SecretAccessKey'],
                          aws_session_token=role['SessionToken'],
                          region_name=workdocs_region
                          )

    folder_id = os.environ['FOLDER_ID']
    doc_id = os.environ['DOC_ID']

    if not doc_id:   
        documents = client.describe_folder_contents(FolderId = folder_id)
        file = os.environ['FILE_NAME'].replace(' ','_')
        for d in documents['Documents']:
            if file in d['LatestVersionMetadata']['Name'].replace(' ','_'):
                doc_id = d['Id']

    doc_meta = client.get_document(
                    DocumentId=doc_id
                    )

    latest_doc = client.get_document_version(
                    DocumentId=doc_meta['Metadata']['Id'],
                    VersionId=doc_meta['Metadata']['LatestVersionMetadata']['Id'],
                    Fields='SOURCE'
                    )

    document_url = latest_doc['Metadata']['Source']['ORIGINAL']
    document_name = latest_doc['Metadata']['Name']
    r = requests.get(document_url)

    wb = load_workbook(filename=BytesIO(r.content))
    for s in wb.sheetnames:
        ws = wb[s]
        data = ws.values
        columns = next(data)[0:]
        columns = [item.lower().replace(' ', '_').strip() for item in columns]
        df = pd.DataFrame(data, columns=columns)
        df['snapshot_date'] = datetime.date.today()
        if os.environ['OUT_LOCATION'] in ['Redshift', 'redshift', 'rs'] :
            redshift_load(df)
        elif os.environ['OUT_LOCATION'] in ['S3' , 's3'] :
            s3_load(df, s)

def redshift_load(df):
    rs = os.environ['REDSHIFT_INSTANCE']
    rs_port = os.environ['REDSHIFT_PORT']
    rs_db = os.environ['REDSHIFT_DB']
    rs_user = os.environ['REDSHIFT_USER']
    rs_password = boto3.client('kms', region_name=services_region).decrypt(CiphertextBlob=b64decode(os.environ['REDSHIFT_PASSWORD']))['Plaintext']
    engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(rs_user,rs_password.decode('utf-8'),rs,rs_port,rs_db))
    schema = os.environ['SCHEMA_NAME']
    table = os.environ['TABLE_NAME']
    df.to_sql(table, engine, schema, if_exists='append', index=False, method='multi')

def s3_load(df, sheet):
    session = boto3.Session()
    creds = session.get_credentials()
    client = boto3.client('s3',
                          aws_access_key_id=creds.access_key,
                          aws_secret_access_key=creds.secret_key,
                          aws_session_token=creds.token,
                          region_name=services_region,
                          )        

    csv = df.to_csv(index=False)
    key = datetime.datetime.today().strftime('%Y/%m/%d') + '/' + sheet + '.csv'
    client.put_object(Bucket=os.environ['BUCKET'], Key=key, Body=csv)


def lambda_handler(event, context) :          
    get_data()

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM