[英]Using Terraform to create AWS ECS with target group always timing out
v0.11.3
我正在设置一个 ECS 集群,目前只有一项服务。 在不中断的情况下启动服务时遇到了几个问题,但现在我的服务似乎无法保持容器运行。
service phoenix-web (instance i-079707fc669361a81) (port 80) is unhealthy in target-group tgqaphoenix-web due to (reason Request timed out)
一旦我的资源启动,我似乎无法在任何实例或 vpc 网关上找到公共 dns 链接
data "template_file" "ecs_task_definition_config" {
template = "${file("config/ecs-task.json")}"
}
resource "aws_ecs_task_definition" "phoenix-web" {
lifecycle {
create_before_destroy = true
}
family = "nginx-phoenix-task"
container_definitions = "${data.template_file.ecs_task_definition_config.rendered}"
}
resource "aws_security_group" "main" {
vpc_id = "${var.vpc_id}"
tags {
Name = "sg${var.name}LoadBalancer"
Project = "${var.name}"
Environment = "${var.environment}"
}
}
resource "aws_security_group_rule" "app_lb_https_ingress" {
type = "ingress"
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
security_group_id = "${aws_security_group.main.id}"
}
resource "aws_alb" "main" {
security_groups = ["${aws_security_group.main.id}"]
subnets = ["${var.public_subnet_ids}"]
name = "alb-${var.environment}-${var.name}"
access_logs {
bucket = "${var.access_log_bucket}"
prefix = "${var.access_log_prefix}"
}
tags {
Name = "alb-${var.environment}-${var.name}"
Project = "${var.name}"
Environment = "${var.environment}"
}
}
resource "aws_alb_target_group" "main" {
name = "tg${var.environment}${var.name}"
health_check {
healthy_threshold = "3"
interval = "30"
protocol = "HTTP"
timeout = "3"
path = "/healthz"
unhealthy_threshold = "2"
}
port = "80"
protocol = "HTTP"
vpc_id = "${var.vpc_id}"
tags {
Name = "tg${var.environment}${var.name}"
Project = "${var.name}"
Environment = "${var.environment}"
}
depends_on = ["aws_alb.main"]
}
resource "aws_alb_listener" "https" {
load_balancer_arn = "${aws_alb.main.id}"
port = "80"
protocol = "HTTP"
default_action {
target_group_arn = "${aws_alb_target_group.main.id}"
type = "forward"
}
}
resource "aws_ecs_service" "service" {
lifecycle {
create_before_destroy = true
}
name = "${var.name}"
cluster = "${var.environment}"
task_definition = "${aws_ecs_task_definition.phoenix-web.id}"
desired_count = "${var.desired_count}"
deployment_minimum_healthy_percent = "${var.deployment_min_healthy_percent}"
deployment_maximum_percent = "${var.deployment_max_percent}"
iam_role = "${aws_iam_role.ecs-role.id}"
load_balancer {
target_group_arn = "${aws_alb_target_group.main.id}"
container_name = "phoenix-web"
container_port = "80"
}
depends_on = ["aws_iam_role.ecs-role", "null_resource.alb_exists"]
}
resource "aws_iam_role_policy" "ecs-policy" {
name = "ecs-policy"
role = "${aws_iam_role.ecs-role.id}"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"ecs:CreateCluster",
"ecs:DeregisterContainerInstance",
"ecs:DiscoverPollEndpoint",
"ecs:Poll",
"ecs:RegisterContainerInstance",
"ecs:StartTelemetrySession",
"ecs:Submit*",
"ecr:GetAuthorizationToken",
"ecr:BatchCheckLayerAvailability",
"ecr:GetDownloadUrlForLayer",
"ecr:BatchGetImage",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:Describe*",
"elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
"elasticloadbalancing:Describe*",
"elasticloadbalancing:RegisterInstancesWithLoadBalancer",
"elasticloadbalancing:RegisterTargets",
"elasticloadbalancing:DeregisterTargets"
],
"Resource": "*"
}
]
}
EOF
depends_on = ["aws_iam_role.ecs-role"]
}
resource "aws_iam_role" "ecs-role" {
name = "ecs-role"
assume_role_policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Action": "sts:AssumeRole",
"Principal": {
"Service": "ecs.amazonaws.com"
},
"Effect": "Allow",
"Sid": ""
}
]
}
EOF
}
resource "aws_appautoscaling_target" "main" {
service_namespace = "ecs"
resource_id = "service/${var.environment}/${var.name}"
scalable_dimension = "ecs:service:DesiredCount"
role_arn = "${aws_iam_role.ecs-role.arn}"
min_capacity = "${var.min_count}"
max_capacity = "${var.max_count}"
depends_on = [
"aws_ecs_service.service",
]
}
resource "null_resource" "alb_exists" {
triggers {
alb_name = "${aws_alb_target_group.main.id}"
}
}
module "s3-log-storage" {
source = "cloudposse/s3-log-storage/aws"
version = "0.1.3"
# insert the 3 required variables here
namespace = "mmt-ecs"
stage = "${var.environment}"
name = "logs-bucket"
policy = <<POLICY
{
"Id": "Policy1519319575520",
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1519319570434",
"Action": [
"s3:PutObject",
"s3:PutObjectAcl",
"s3:PutObjectTagging",
"s3:PutObjectVersionAcl",
"s3:PutObjectVersionTagging"
],
"Effect": "Allow",
"Resource": "arn:aws:s3:::mmt-ecs-qa-logs-bucket/*",
"Principal": "*"
}
]
}
POLICY
}
module "network" {
source = "../network"
environment = "${var.environment}"
vpc_cidr = "${var.vpc_cidr}"
public_subnet_cidrs = "${var.public_subnet_cidrs}"
private_subnet_cidrs = "${var.private_subnet_cidrs}"
availability_zones = "${var.availability_zones}"
depends_id = ""
}
module "ecs_instances" {
source = "../ecs_instances"
environment = "${var.environment}"
cluster = "${var.cluster}"
instance_group = "${var.instance_group}"
private_subnet_ids = "${module.network.private_subnet_ids}"
aws_ami = "${var.ecs_aws_ami}"
instance_type = "${var.instance_type}"
max_size = "${var.max_size}"
min_size = "${var.min_size}"
desired_capacity = "${var.desired_capacity}"
vpc_id = "${module.network.vpc_id}"
iam_instance_profile_id = "${aws_iam_instance_profile.ecs.id}"
key_name = "${var.key_name}"
load_balancers = "${var.load_balancers}"
depends_id = "${module.network.depends_id}"
custom_userdata = "${var.custom_userdata}"
cloudwatch_prefix = "${var.cloudwatch_prefix}"
}
module "web-phoenix-service" {
source = "../services/web-phoenix"
environment = "${var.environment}"
vpc_id = "${module.network.vpc_id}"
public_subnet_ids = "${module.network.public_subnet_ids}"
name = "phoenix-web"
deployment_max_percent = "200"
deployment_min_healthy_percent = "100"
max_count = "2"
min_count = "1"
desired_count = "1"
ecs_service_role_name = "${aws_iam_instance_profile.ecs.id}"
access_log_bucket = "${module.s3-log-storage.bucket_id}"
access_log_prefix = "ALB"
}
resource "aws_ecs_cluster" "cluster" {
name = "${var.cluster}"
}
似乎应用程序运行状况检查失败,即 /healthz。 您开始调试问题,如下所示:
1) 在本地启动一个容器并检查它是否正常工作。 根据您上面的健康检查信息,您应该能够访问像http://someip:port/healthz这样的应用程序如果可行2) 您在构建docker 镜像时是否暴露了端口 80? 签入 docker 文件。 3) 如果以上两个步骤看起来没问题,那么在任务运行后立即尝试使用 EC S 实例 ip 访问您的应用程序。 http://ecsinstanceip:port/healthz 。 4) 如果 3 也有效,他们会尝试增加健康检查超时时间,以便应用程序有更多时间通过健康检查。
确保ECS容器实例的安全组能够接受VPN内部的1024-65535端口(不要对外开放)
在portMappings
的任务定义portMappings
指定为:
"portMappings": [
{
"hostPort": 0,
"protocol": "tcp",
"containerPort": 80
}
],
请注意: containerPort
是您从容器中expose
的内容,您的应用程序正在侦听其健康检查hostPort
将是您绑定的用于在主机上转发的端口。 留0,ECS会自动分配,这就是你需要在SG上打开1024-65535的原因。 这是必需的,因此您将能够在同一个实例上多次运行相同的任务定义(水平扩展)。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.