v0.11.3
I'm setting up an ECS cluster with currently one service. Had several issues getting the service up without breaking, but now my service can't seem to keep a container running.
service phoenix-web (instance i-079707fc669361a81) (port 80) is unhealthy in target-group tgqaphoenix-web due to (reason Request timed out)
Once my resources are up, I can't seem to find a public dns link on any instance or on the vpc gateway
data "template_file" "ecs_task_definition_config" {
template = "${file("config/ecs-task.json")}"
}
resource "aws_ecs_task_definition" "phoenix-web" {
lifecycle {
create_before_destroy = true
}
family = "nginx-phoenix-task"
container_definitions = "${data.template_file.ecs_task_definition_config.rendered}"
}
resource "aws_security_group" "main" {
vpc_id = "${var.vpc_id}"
tags {
Name = "sg${var.name}LoadBalancer"
Project = "${var.name}"
Environment = "${var.environment}"
}
}
resource "aws_security_group_rule" "app_lb_https_ingress" {
type = "ingress"
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
security_group_id = "${aws_security_group.main.id}"
}
resource "aws_alb" "main" {
security_groups = ["${aws_security_group.main.id}"]
subnets = ["${var.public_subnet_ids}"]
name = "alb-${var.environment}-${var.name}"
access_logs {
bucket = "${var.access_log_bucket}"
prefix = "${var.access_log_prefix}"
}
tags {
Name = "alb-${var.environment}-${var.name}"
Project = "${var.name}"
Environment = "${var.environment}"
}
}
resource "aws_alb_target_group" "main" {
name = "tg${var.environment}${var.name}"
health_check {
healthy_threshold = "3"
interval = "30"
protocol = "HTTP"
timeout = "3"
path = "/healthz"
unhealthy_threshold = "2"
}
port = "80"
protocol = "HTTP"
vpc_id = "${var.vpc_id}"
tags {
Name = "tg${var.environment}${var.name}"
Project = "${var.name}"
Environment = "${var.environment}"
}
depends_on = ["aws_alb.main"]
}
resource "aws_alb_listener" "https" {
load_balancer_arn = "${aws_alb.main.id}"
port = "80"
protocol = "HTTP"
default_action {
target_group_arn = "${aws_alb_target_group.main.id}"
type = "forward"
}
}
resource "aws_ecs_service" "service" {
lifecycle {
create_before_destroy = true
}
name = "${var.name}"
cluster = "${var.environment}"
task_definition = "${aws_ecs_task_definition.phoenix-web.id}"
desired_count = "${var.desired_count}"
deployment_minimum_healthy_percent = "${var.deployment_min_healthy_percent}"
deployment_maximum_percent = "${var.deployment_max_percent}"
iam_role = "${aws_iam_role.ecs-role.id}"
load_balancer {
target_group_arn = "${aws_alb_target_group.main.id}"
container_name = "phoenix-web"
container_port = "80"
}
depends_on = ["aws_iam_role.ecs-role", "null_resource.alb_exists"]
}
resource "aws_iam_role_policy" "ecs-policy" {
name = "ecs-policy"
role = "${aws_iam_role.ecs-role.id}"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"ecs:CreateCluster",
"ecs:DeregisterContainerInstance",
"ecs:DiscoverPollEndpoint",
"ecs:Poll",
"ecs:RegisterContainerInstance",
"ecs:StartTelemetrySession",
"ecs:Submit*",
"ecr:GetAuthorizationToken",
"ecr:BatchCheckLayerAvailability",
"ecr:GetDownloadUrlForLayer",
"ecr:BatchGetImage",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:Describe*",
"elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
"elasticloadbalancing:Describe*",
"elasticloadbalancing:RegisterInstancesWithLoadBalancer",
"elasticloadbalancing:RegisterTargets",
"elasticloadbalancing:DeregisterTargets"
],
"Resource": "*"
}
]
}
EOF
depends_on = ["aws_iam_role.ecs-role"]
}
resource "aws_iam_role" "ecs-role" {
name = "ecs-role"
assume_role_policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Action": "sts:AssumeRole",
"Principal": {
"Service": "ecs.amazonaws.com"
},
"Effect": "Allow",
"Sid": ""
}
]
}
EOF
}
resource "aws_appautoscaling_target" "main" {
service_namespace = "ecs"
resource_id = "service/${var.environment}/${var.name}"
scalable_dimension = "ecs:service:DesiredCount"
role_arn = "${aws_iam_role.ecs-role.arn}"
min_capacity = "${var.min_count}"
max_capacity = "${var.max_count}"
depends_on = [
"aws_ecs_service.service",
]
}
resource "null_resource" "alb_exists" {
triggers {
alb_name = "${aws_alb_target_group.main.id}"
}
}
module "s3-log-storage" {
source = "cloudposse/s3-log-storage/aws"
version = "0.1.3"
# insert the 3 required variables here
namespace = "mmt-ecs"
stage = "${var.environment}"
name = "logs-bucket"
policy = <<POLICY
{
"Id": "Policy1519319575520",
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1519319570434",
"Action": [
"s3:PutObject",
"s3:PutObjectAcl",
"s3:PutObjectTagging",
"s3:PutObjectVersionAcl",
"s3:PutObjectVersionTagging"
],
"Effect": "Allow",
"Resource": "arn:aws:s3:::mmt-ecs-qa-logs-bucket/*",
"Principal": "*"
}
]
}
POLICY
}
module "network" {
source = "../network"
environment = "${var.environment}"
vpc_cidr = "${var.vpc_cidr}"
public_subnet_cidrs = "${var.public_subnet_cidrs}"
private_subnet_cidrs = "${var.private_subnet_cidrs}"
availability_zones = "${var.availability_zones}"
depends_id = ""
}
module "ecs_instances" {
source = "../ecs_instances"
environment = "${var.environment}"
cluster = "${var.cluster}"
instance_group = "${var.instance_group}"
private_subnet_ids = "${module.network.private_subnet_ids}"
aws_ami = "${var.ecs_aws_ami}"
instance_type = "${var.instance_type}"
max_size = "${var.max_size}"
min_size = "${var.min_size}"
desired_capacity = "${var.desired_capacity}"
vpc_id = "${module.network.vpc_id}"
iam_instance_profile_id = "${aws_iam_instance_profile.ecs.id}"
key_name = "${var.key_name}"
load_balancers = "${var.load_balancers}"
depends_id = "${module.network.depends_id}"
custom_userdata = "${var.custom_userdata}"
cloudwatch_prefix = "${var.cloudwatch_prefix}"
}
module "web-phoenix-service" {
source = "../services/web-phoenix"
environment = "${var.environment}"
vpc_id = "${module.network.vpc_id}"
public_subnet_ids = "${module.network.public_subnet_ids}"
name = "phoenix-web"
deployment_max_percent = "200"
deployment_min_healthy_percent = "100"
max_count = "2"
min_count = "1"
desired_count = "1"
ecs_service_role_name = "${aws_iam_instance_profile.ecs.id}"
access_log_bucket = "${module.s3-log-storage.bucket_id}"
access_log_prefix = "ALB"
}
resource "aws_ecs_cluster" "cluster" {
name = "${var.cluster}"
}
It seems the application health check is failing ie /healthz. You start debugging issue like below:
1) Spin up a container in your local and check whether it is working or not. Per your health check info above, you should be able to access application like http://someip:port/healthz If this works 2) Are you exposing port 80 while building docker image ? Check in docker file. 3) if above two steps seems okay, then try accessing your application by using EC S instance ip as soon as task is running. http://ecsinstanceip:port/healthz . 4) If 3 also works, they try increasing the health check timeout period so that the application gets more time to pass its health check..
Make sure that the ECS container instance's security group is able to accept ports 1024-65535 inside the VPN (don't open it for the outside world)
On the task definition for the portMappings
specify it like:
"portMappings": [
{
"hostPort": 0,
"protocol": "tcp",
"containerPort": 80
}
],
Note here: containerPort
is what you expose
from your container, where you app is listening with its healthcheck hostPort
would be what port you bind for forwarding on the host. Leave it 0 an it will be automatically assigned by ECS, that's why you need to open 1024-65535 on the SG. This is needed so you will be able to run the same task definition multiple times on the same instance (scale horizontally).
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.