[英]cannot run queries against aws athena and glue database
我已经从 dynamodb -> aws 胶水 -> athena 改造了一个堆栈,我可以看到所有列都在 aws 胶水中创建并且表存在那里但是当查看 athena 时似乎只有数据库在那里,即使在查询时数据库表模式和列存在查询不起作用。
SELECT tenant, COUNT(DISTINCT id) counts
FROM "account-profiles-glue-db"."account_profiles"
group by tenant
我的 tf 看起来像:
locals {
table-name = var.table-name
athena-results-s3-name = "${local.table-name}-analytics"
athena-workgroup-name = "${local.table-name}"
glue-db-name = "${local.table-name}-glue-db"
glue-crawler-name = "${local.table-name}-crawler"
glue-crawler-role-name = "${local.table-name}-crawler-role"
glue-crawler-policy-name = "${local.table-name}-crawler"
}
resource "aws_kms_key" "aws_kms_key" {
description = "KMS key for whole project"
deletion_window_in_days = 10
}
##################################################################
# glue
##################################################################
resource "aws_glue_catalog_database" "aws_glue_catalog_database" {
name = local.glue-db-name
}
resource "aws_glue_crawler" "aws_glue_crawler" {
database_name = aws_glue_catalog_database.aws_glue_catalog_database.name
name = local.glue-crawler-name
role = aws_iam_role.aws_iam_role_glue_crawler.arn
configuration = jsonencode(
{
"Version" : 1.0
CrawlerOutput = {
Partitions = { AddOrUpdateBehavior = "InheritFromTable" }
}
}
)
dynamodb_target {
path = local.table-name
}
}
resource "aws_iam_role" "aws_iam_role_glue_crawler" {
name = local.glue-crawler-role-name
assume_role_policy = jsonencode(
{
"Version" : "2012-10-17",
"Statement" : [
{
"Action" : "sts:AssumeRole",
"Principal" : {
"Service" : "glue.amazonaws.com"
},
"Effect" : "Allow",
"Sid" : ""
}
]
}
)
}
resource "aws_iam_role_policy" "aws_iam_role_policy_glue_crawler" {
name = local.glue-crawler-policy-name
role = aws_iam_role.aws_iam_role_glue_crawler.id
policy = jsonencode(
{
"Version" : "2012-10-17",
"Statement" : [
{
"Effect" : "Allow",
"Action" : [
"*"
],
"Resource" : [
"*"
]
}
]
}
)
}
##################################################################
# athena
##################################################################
resource "aws_s3_bucket" "aws_s3_bucket_analytics" {
bucket = local.athena-results-s3-name
acl = "private"
versioning {
enabled = true
}
server_side_encryption_configuration {
rule {
apply_server_side_encryption_by_default {
kms_master_key_id = aws_kms_key.aws_kms_key.arn
sse_algorithm = "aws:kms"
}
}
}
}
resource "aws_athena_workgroup" "aws_athena_workgroup" {
name = local.athena-workgroup-name
configuration {
enforce_workgroup_configuration = true
publish_cloudwatch_metrics_enabled = true
result_configuration {
output_location = "s3://${aws_s3_bucket.aws_s3_bucket_analytics.bucket}/output/"
encryption_configuration {
encryption_option = "SSE_KMS"
kms_key_arn = aws_kms_key.aws_kms_key.arn
}
}
}
}
查看您提供的 Terraform 和AWS 上的 Glue 文档,您只是在抓取 DynamoDB 表,并没有为它触发任何作业。 Glue 作业是您运行业务逻辑以转换和加载数据的地方。 这是您声明将源数据发送到 S3 以供 Athena 读取的地方。
如果您在为 Glue 作业生成代码时需要帮助,我建议您使用具有可视化编辑器的Glue Studio ,该编辑器也可以生成您的代码。 您可以 select 您的源、目标和您需要的任何转换。 此时,您可以使用glue_job
资源并引用您在 Glue Studio 中生成的脚本。
除非您需要对数据执行一些 ETL,否则请考虑使用 AWSLabs GitHub 中提供的Athena-DynamoDB-Connector 将 Athena 直接连接到 DynamoDB 。 您还可以将 DynamoDB 数据导出到 S3 ,然后将 Athena 连接到该 S3 存储桶。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.