[英]Using Terraform to create AWS ECS with target group always timing out
v0.11.3 v0.11.3
I'm setting up an ECS cluster with currently one service.我正在设置一个 ECS 集群,目前只有一项服务。 Had several issues getting the service up without breaking, but now my service can't seem to keep a container running.
在不中断的情况下启动服务时遇到了几个问题,但现在我的服务似乎无法保持容器运行。
service phoenix-web (instance i-079707fc669361a81) (port 80) is unhealthy in target-group tgqaphoenix-web due to (reason Request timed out)
Once my resources are up, I can't seem to find a public dns link on any instance or on the vpc gateway一旦我的资源启动,我似乎无法在任何实例或 vpc 网关上找到公共 dns 链接
data "template_file" "ecs_task_definition_config" {
template = "${file("config/ecs-task.json")}"
}
resource "aws_ecs_task_definition" "phoenix-web" {
lifecycle {
create_before_destroy = true
}
family = "nginx-phoenix-task"
container_definitions = "${data.template_file.ecs_task_definition_config.rendered}"
}
resource "aws_security_group" "main" {
vpc_id = "${var.vpc_id}"
tags {
Name = "sg${var.name}LoadBalancer"
Project = "${var.name}"
Environment = "${var.environment}"
}
}
resource "aws_security_group_rule" "app_lb_https_ingress" {
type = "ingress"
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
security_group_id = "${aws_security_group.main.id}"
}
resource "aws_alb" "main" {
security_groups = ["${aws_security_group.main.id}"]
subnets = ["${var.public_subnet_ids}"]
name = "alb-${var.environment}-${var.name}"
access_logs {
bucket = "${var.access_log_bucket}"
prefix = "${var.access_log_prefix}"
}
tags {
Name = "alb-${var.environment}-${var.name}"
Project = "${var.name}"
Environment = "${var.environment}"
}
}
resource "aws_alb_target_group" "main" {
name = "tg${var.environment}${var.name}"
health_check {
healthy_threshold = "3"
interval = "30"
protocol = "HTTP"
timeout = "3"
path = "/healthz"
unhealthy_threshold = "2"
}
port = "80"
protocol = "HTTP"
vpc_id = "${var.vpc_id}"
tags {
Name = "tg${var.environment}${var.name}"
Project = "${var.name}"
Environment = "${var.environment}"
}
depends_on = ["aws_alb.main"]
}
resource "aws_alb_listener" "https" {
load_balancer_arn = "${aws_alb.main.id}"
port = "80"
protocol = "HTTP"
default_action {
target_group_arn = "${aws_alb_target_group.main.id}"
type = "forward"
}
}
resource "aws_ecs_service" "service" {
lifecycle {
create_before_destroy = true
}
name = "${var.name}"
cluster = "${var.environment}"
task_definition = "${aws_ecs_task_definition.phoenix-web.id}"
desired_count = "${var.desired_count}"
deployment_minimum_healthy_percent = "${var.deployment_min_healthy_percent}"
deployment_maximum_percent = "${var.deployment_max_percent}"
iam_role = "${aws_iam_role.ecs-role.id}"
load_balancer {
target_group_arn = "${aws_alb_target_group.main.id}"
container_name = "phoenix-web"
container_port = "80"
}
depends_on = ["aws_iam_role.ecs-role", "null_resource.alb_exists"]
}
resource "aws_iam_role_policy" "ecs-policy" {
name = "ecs-policy"
role = "${aws_iam_role.ecs-role.id}"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"ecs:CreateCluster",
"ecs:DeregisterContainerInstance",
"ecs:DiscoverPollEndpoint",
"ecs:Poll",
"ecs:RegisterContainerInstance",
"ecs:StartTelemetrySession",
"ecs:Submit*",
"ecr:GetAuthorizationToken",
"ecr:BatchCheckLayerAvailability",
"ecr:GetDownloadUrlForLayer",
"ecr:BatchGetImage",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:Describe*",
"elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
"elasticloadbalancing:Describe*",
"elasticloadbalancing:RegisterInstancesWithLoadBalancer",
"elasticloadbalancing:RegisterTargets",
"elasticloadbalancing:DeregisterTargets"
],
"Resource": "*"
}
]
}
EOF
depends_on = ["aws_iam_role.ecs-role"]
}
resource "aws_iam_role" "ecs-role" {
name = "ecs-role"
assume_role_policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Action": "sts:AssumeRole",
"Principal": {
"Service": "ecs.amazonaws.com"
},
"Effect": "Allow",
"Sid": ""
}
]
}
EOF
}
resource "aws_appautoscaling_target" "main" {
service_namespace = "ecs"
resource_id = "service/${var.environment}/${var.name}"
scalable_dimension = "ecs:service:DesiredCount"
role_arn = "${aws_iam_role.ecs-role.arn}"
min_capacity = "${var.min_count}"
max_capacity = "${var.max_count}"
depends_on = [
"aws_ecs_service.service",
]
}
resource "null_resource" "alb_exists" {
triggers {
alb_name = "${aws_alb_target_group.main.id}"
}
}
module "s3-log-storage" {
source = "cloudposse/s3-log-storage/aws"
version = "0.1.3"
# insert the 3 required variables here
namespace = "mmt-ecs"
stage = "${var.environment}"
name = "logs-bucket"
policy = <<POLICY
{
"Id": "Policy1519319575520",
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1519319570434",
"Action": [
"s3:PutObject",
"s3:PutObjectAcl",
"s3:PutObjectTagging",
"s3:PutObjectVersionAcl",
"s3:PutObjectVersionTagging"
],
"Effect": "Allow",
"Resource": "arn:aws:s3:::mmt-ecs-qa-logs-bucket/*",
"Principal": "*"
}
]
}
POLICY
}
module "network" {
source = "../network"
environment = "${var.environment}"
vpc_cidr = "${var.vpc_cidr}"
public_subnet_cidrs = "${var.public_subnet_cidrs}"
private_subnet_cidrs = "${var.private_subnet_cidrs}"
availability_zones = "${var.availability_zones}"
depends_id = ""
}
module "ecs_instances" {
source = "../ecs_instances"
environment = "${var.environment}"
cluster = "${var.cluster}"
instance_group = "${var.instance_group}"
private_subnet_ids = "${module.network.private_subnet_ids}"
aws_ami = "${var.ecs_aws_ami}"
instance_type = "${var.instance_type}"
max_size = "${var.max_size}"
min_size = "${var.min_size}"
desired_capacity = "${var.desired_capacity}"
vpc_id = "${module.network.vpc_id}"
iam_instance_profile_id = "${aws_iam_instance_profile.ecs.id}"
key_name = "${var.key_name}"
load_balancers = "${var.load_balancers}"
depends_id = "${module.network.depends_id}"
custom_userdata = "${var.custom_userdata}"
cloudwatch_prefix = "${var.cloudwatch_prefix}"
}
module "web-phoenix-service" {
source = "../services/web-phoenix"
environment = "${var.environment}"
vpc_id = "${module.network.vpc_id}"
public_subnet_ids = "${module.network.public_subnet_ids}"
name = "phoenix-web"
deployment_max_percent = "200"
deployment_min_healthy_percent = "100"
max_count = "2"
min_count = "1"
desired_count = "1"
ecs_service_role_name = "${aws_iam_instance_profile.ecs.id}"
access_log_bucket = "${module.s3-log-storage.bucket_id}"
access_log_prefix = "ALB"
}
resource "aws_ecs_cluster" "cluster" {
name = "${var.cluster}"
}
It seems the application health check is failing ie /healthz.似乎应用程序运行状况检查失败,即 /healthz。 You start debugging issue like below:
您开始调试问题,如下所示:
1) Spin up a container in your local and check whether it is working or not. 1) 在本地启动一个容器并检查它是否正常工作。 Per your health check info above, you should be able to access application like http://someip:port/healthz If this works 2) Are you exposing port 80 while building docker image ?
根据您上面的健康检查信息,您应该能够访问像http://someip:port/healthz这样的应用程序如果可行2) 您在构建docker 镜像时是否暴露了端口 80? Check in docker file.
签入 docker 文件。 3) if above two steps seems okay, then try accessing your application by using EC S instance ip as soon as task is running.
3) 如果以上两个步骤看起来没问题,那么在任务运行后立即尝试使用 EC S 实例 ip 访问您的应用程序。 http://ecsinstanceip:port/healthz .
http://ecsinstanceip:port/healthz 。 4) If 3 also works, they try increasing the health check timeout period so that the application gets more time to pass its health check..
4) 如果 3 也有效,他们会尝试增加健康检查超时时间,以便应用程序有更多时间通过健康检查。
Make sure that the ECS container instance's security group is able to accept ports 1024-65535 inside the VPN (don't open it for the outside world)确保ECS容器实例的安全组能够接受VPN内部的1024-65535端口(不要对外开放)
On the task definition for the portMappings
specify it like:在
portMappings
的任务定义portMappings
指定为:
"portMappings": [
{
"hostPort": 0,
"protocol": "tcp",
"containerPort": 80
}
],
Note here: containerPort
is what you expose
from your container, where you app is listening with its healthcheck hostPort
would be what port you bind for forwarding on the host.请注意:
containerPort
是您从容器中expose
的内容,您的应用程序正在侦听其健康检查hostPort
将是您绑定的用于在主机上转发的端口。 Leave it 0 an it will be automatically assigned by ECS, that's why you need to open 1024-65535 on the SG.留0,ECS会自动分配,这就是你需要在SG上打开1024-65535的原因。 This is needed so you will be able to run the same task definition multiple times on the same instance (scale horizontally).
这是必需的,因此您将能够在同一个实例上多次运行相同的任务定义(水平扩展)。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.