简体   繁体   中英

Export DynamoDB to S3 AWS Data Pipeline in us-east-2

I'd like to backup (and later import) a dynamodb table to S3. The dynamodb table exists in us-east-2, but this is an unsupported region for aws data pipelines. AWS docs seem to indicate that that shouldn't be a problem, but I can't seem to get the data pipeline to look for the table in us-east-2.

Here's an export of my data pipeline. When I run this, I get a 'resource not found error' when looking up the dynamodb table. If I temporarily create a table with the same name in us-west-2 where this data pipeline is running, the job works, but pulls the data from the table in us-west-2 instead of us-east-2. Any way to get this job to pull from the region specified in the configuration?

{
  "objects": [
    {
      "readThroughputPercent": "#{myDDBReadThroughputRatio}",
      "name": "DDBSourceTable",
      "id": "DDBSourceTable",
      "type": "DynamoDBDataNode",
      "region": "us-east-2",
      "tableName": "#{myDDBTableName}"
    },
    {
      "period": "6 Hours",
      "name": "Every 6 hours",
      "id": "DefaultSchedule",
      "type": "Schedule",
      "startAt": "FIRST_ACTIVATION_DATE_TIME"
    },
    {
      "bootstrapAction": "s3://us-west-2.elasticmapreduce/bootstrap-actions/configure-hadoop, --yarn-key-value,yarn.nodemanager.resource.memory-mb=11520,--yarn-key-value,yarn.scheduler.maximum-allocation-mb=11520,--yarn-key-value,yarn.scheduler.minimum-allocation-mb=1440,--yarn-key-value,yarn.app.mapreduce.am.resource.mb=2880,--mapred-key-value,mapreduce.map.memory.mb=5760,--mapred-key-value,mapreduce.map.java.opts=-Xmx4608M,--mapred-key-value,mapreduce.reduce.memory.mb=2880,--mapred-key-value,mapreduce.reduce.java.opts=-Xmx2304m,--mapred-key-value,mapreduce.map.speculative=false",
      "name": "EmrClusterForBackup",
      "coreInstanceCount": "1",
      "coreInstanceType": "m3.xlarge",
      "amiVersion": "3.9.0",
      "masterInstanceType": "m3.xlarge",
      "id": "EmrClusterForBackup",
      "region": "us-west-2",
      "type": "EmrCluster",
      "terminateAfter": "1 Hour"
    },
    {
      "directoryPath": "#{myOutputS3Loc}/#{format(@scheduledStartTime, 'YYYY-MM-dd-HH-mm-ss')}",
      "name": "S3BackupLocation",
      "id": "S3BackupLocation",
      "type": "S3DataNode"
    },
    {
      "output": {
        "ref": "S3BackupLocation"
      },
      "input": {
        "ref": "DDBSourceTable"
      },
      "maximumRetries": "2",
      "name": "TableBackupActivity",
      "step": "s3://dynamodb-emr-us-west-2/emr-ddb-storage-handler/2.1.0/emr-ddb-2.1.0.jar,org.apache.hadoop.dynamodb.tools.DynamoDbExport,#{output.directoryPath},#{input.tableName},#{input.readThroughputPercent}",
      "id": "TableBackupActivity",
      "runsOn": {
        "ref": "EmrClusterForBackup"
      },
      "type": "EmrActivity",
      "resizeClusterBeforeRunning": "true"
    },
    {
      "failureAndRerunMode": "CASCADE",
      "schedule": {
        "ref": "DefaultSchedule"
      },
      "resourceRole": "data_pipeline_etl_role",
      "pipelineLogUri": "s3://MY_S3_BUCKET/",
      "role": "data_pipeline_pipeline_role",
      "scheduleType": "cron",
      "name": "Default",
      "id": "Default"
    }
  ],
  "parameters": [
    {
      "description": "Output S3 folder",
      "id": "myOutputS3Loc",
      "type": "AWS::S3::ObjectKey"
    },
    {
      "description": "Source DynamoDB table name",
      "id": "myDDBTableName",
      "type": "String"
    },
    {
      "default": "0.25",
      "watermark": "Enter value between 0.1-1.0",
      "description": "DynamoDB read throughput ratio",
      "id": "myDDBReadThroughputRatio",
      "type": "Double"
    },
    {
      "default": "us-east-1",
      "watermark": "us-east-1",
      "description": "Region of the DynamoDB table",
      "id": "myDDBRegion",
      "type": "String"
    }
  ],
  "values": {
    "myDDBRegion": "us-east-2",
    "myDDBTableName": "prod--users",
    "myDDBReadThroughputRatio": "0.25",
    "myOutputS3Loc": "s3://MY_S3_BUCKET"
  }
}

同样的问题,在us-east-2中找不到DynamoDB表

Is it a one off or something you want to do continuously? Could you use DynamoDB global tables to replicate the table in a supported region, then just remove the region once your backup is done?

Global table replication is free. You should just pay for the capacity on your replicated table whilst its up and running.

https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/GlobalTables.html

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM