简体   繁体   中英

Terraform dial tcp dns error while creating AWS ALB ingress with EKS cluster

I am trying to use Terraform to create an AWS EKS cluster with an ALB load balancer and kubernetes ingress.

I have been using this git repo and this blog to guide me.

The deploy fails with the following errors immediately after the cluster has been created.

Error: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/api/v1/namespaces/kube-system/configmaps": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host

  on modules/alb/alb_ingress_controller.tf line 1, in resource "kubernetes_config_map" "aws_auth":
   1: resource "kubernetes_config_map" "aws_auth" {


Error: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/apis/rbac.authorization.k8s.io/v1/clusterroles": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host

  on modules/alb/alb_ingress_controller.tf line 20, in resource "kubernetes_cluster_role" "alb-ingress":
  20: resource "kubernetes_cluster_role" "alb-ingress" {


Error: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/apis/rbac.authorization.k8s.io/v1/clusterrolebindings": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host

  on modules/alb/alb_ingress_controller.tf line 41, in resource "kubernetes_cluster_role_binding" "alb-ingress":
  41: resource "kubernetes_cluster_role_binding" "alb-ingress" {


Error: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/api/v1/namespaces/kube-system/serviceaccounts": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host

  on modules/alb/alb_ingress_controller.tf line 62, in resource "kubernetes_service_account" "alb-ingress":
  62: resource "kubernetes_service_account" "alb-ingress" {


Error: Failed to create Ingress 'default/main-ingress' because: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/apis/extensions/v1beta1/namespaces/default/ingresses": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host

  on modules/alb/kubernetes_ingress.tf line 1, in resource "kubernetes_ingress" "main":
   1: resource "kubernetes_ingress" "main" {


Error: Post "https://641480DEC80EB445C6CBBEDC9D1F0234.yl4.ap-southeast-1.eks.amazonaws.com/api/v1/namespaces/kube-system/configmaps": dial tcp 10.0.21.192:443: connect: no route to host

  on modules/eks/allow_nodes.tf line 22, in resource "kubernetes_config_map" "aws_auth":
  22: resource "kubernetes_config_map" "aws_auth" {

Here is my Terraform code:

provider "aws" {
  region  = var.aws_region
  version = "~> 2.65.0"

  ignore_tags { 
    keys = ["kubernetes.io/role/internal-elb", "app.kubernetes.io/name"]
    key_prefixes = ["kubernetes.io/cluster/", "alb.ingress.kubernetes.io/"]
  }
}

resource "kubernetes_config_map" "aws_auth" {
  metadata {
    name = "aws-auth"
    namespace = "kube-system"
  }
  data = {
    mapRoles = <<EOF
- rolearn: ${var.iam_role_node}
  username: system:node:{{EC2PrivateDNSName}}
  groups:
    - system:bootstrappers
    - system:nodes
EOF
  }
  depends_on = [
    var.eks_cluster_name
  ]
}

resource "kubernetes_cluster_role" "alb-ingress" {
  metadata {
    name = "alb-ingress-controller"
    labels = {
      "app.kubernetes.io/name" = "alb-ingress-controller"
    }
  }

  rule {
    api_groups = ["", "extensions"]
    resources  = ["configmaps", "endpoints", "events", "ingresses", "ingresses/status", "services"]
    verbs      = ["create", "get", "list", "update", "watch", "patch"]
  }

  rule {
    api_groups = ["", "extensions"]
    resources  = ["nodes", "pods", "secrets", "services", "namespaces"]
    verbs      = ["get", "list", "watch"]
  }
}

resource "kubernetes_cluster_role_binding" "alb-ingress" {
  metadata {
    name = "alb-ingress-controller"
    labels = {
      "app.kubernetes.io/name" = "alb-ingress-controller"
    }
  }

  role_ref {
    api_group = "rbac.authorization.k8s.io"
    kind      = "ClusterRole"
    name      = "alb-ingress-controller"
  }

  subject {
    kind      = "ServiceAccount"
    name      = "alb-ingress-controller"
    namespace = "kube-system"
  }
}

resource "kubernetes_service_account" "alb-ingress" {
  metadata {
    name = "alb-ingress-controller"
    namespace = "kube-system"
    labels = {
      "app.kubernetes.io/name" = "alb-ingress-controller"
    }
  }

  automount_service_account_token = true
}

resource "kubernetes_deployment" "alb-ingress" {
  metadata {
    name = "alb-ingress-controller"
    labels = {
      "app.kubernetes.io/name" = "alb-ingress-controller"
    }
    namespace = "kube-system"
  }

  spec {
    selector {
      match_labels = {
        "app.kubernetes.io/name" = "alb-ingress-controller"
      }
    }

    template {
      metadata {
        labels = {
          "app.kubernetes.io/name" = "alb-ingress-controller"
        }
      }
      spec {
        volume {
          name = kubernetes_service_account.alb-ingress.default_secret_name
          secret {
            secret_name = kubernetes_service_account.alb-ingress.default_secret_name
          }
        }
        container {
          # This is where you change the version when Amazon comes out with a new version of the ingress controller
          image = "docker.io/amazon/aws-alb-ingress-controller:v1.1.7"
          name  = "alb-ingress-controller"
          args = [
            "--ingress-class=alb",
            "--cluster-name=${var.eks_cluster_name}",
            "--aws-vpc-id=${var.vpc_id}",
            "--aws-region=${var.aws_region}"]
        }

        service_account_name = "alb-ingress-controller"

      }
    }
  }
}

########################################################################################
# setup provider for kubernetes

//data "external" "aws_iam_authenticator" {
//  program = ["sh", "-c", "aws-iam-authenticator token -i ${var.cluster_name} | jq -r -c .status"]
//}

data "aws_eks_cluster_auth" "tf_eks_cluster" {
  name = aws_eks_cluster.tf_eks_cluster.name
}

provider "kubernetes" {
  host                      = aws_eks_cluster.tf_eks_cluster.endpoint
  cluster_ca_certificate    = base64decode(aws_eks_cluster.tf_eks_cluster.certificate_authority.0.data)
  //token                   = data.external.aws_iam_authenticator.result.token
  token                     = data.aws_eks_cluster_auth.tf_eks_cluster.token
  load_config_file          = false
  version = "~> 1.9"
}

# Allow worker nodes to join cluster via config map
resource "kubernetes_config_map" "aws_auth" {
  metadata {
    name = "aws-auth"
    namespace = "kube-system"
  }

  data = {
    mapRoles = <<EOF
- rolearn: ${aws_iam_role.tf-eks-node.arn}
  username: system:node:{{EC2PrivateDNSName}}
  groups:
    - system:bootstrappers
    - system:nodes
EOF
  }
  depends_on = [aws_eks_cluster.tf_eks_cluster, aws_autoscaling_group.tf_eks_cluster] 
}



resource "kubernetes_ingress" "main" {
  metadata {
    name = "main-ingress"
    annotations = {
      "alb.ingress.kubernetes.io/scheme" = "internet-facing"
      "kubernetes.io/ingress.class" = "alb"
      "alb.ingress.kubernetes.io/subnets" = var.app_subnet_stringlist
      "alb.ingress.kubernetes.io/certificate-arn" = "${data.aws_acm_certificate.api.arn}, ${data.aws_acm_certificate.gitea.arn}"
      "alb.ingress.kubernetes.io/listen-ports" = <<JSON
[
  {"HTTP": 80},
  {"HTTPS": 443}
]
JSON
      "alb.ingress.kubernetes.io/actions.ssl-redirect" = <<JSON
{
  "Type": "redirect",
  "RedirectConfig": {
    "Protocol": "HTTPS",
    "Port": "443",
    "StatusCode": "HTTP_301"
  }
}
JSON
    }
  }

  spec {
    rule {
      host = "api.xactpos.com"
      http {
        path {
          backend {
            service_name = "ssl-redirect"
            service_port = "use-annotation"
          }
          path = "/*"
        }
        path {
          backend {
            service_name = "app-service1"
            service_port = 80
          }
          path = "/service1"
        }
        path {
          backend {
            service_name = "app-service2"
            service_port = 80
          }
          path = "/service2"
        }
      }
    }

    rule {
      host = "gitea.xactpos.com"
      http {
        path {
          backend {
            service_name = "ssl-redirect"
            service_port = "use-annotation"
          }
          path = "/*"
        }
        path {
          backend {
            service_name = "api-service1"
            service_port = 80
          }
          path = "/service3"
        }
        path {
          backend {
            service_name = "api-service2"
            service_port = 80
          }
          path = "/graphq4"
        }
      }
    }
  }
}

resource "aws_security_group" "eks-alb" {
  name        = "eks-alb-public"
  description = "Security group allowing public traffic for the eks load balancer."
  vpc_id      = var.vpc_id

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = map(
     "Name", "terraform-eks-alb",
     "kubernetes.io/cluster/tf-eks-cluster", "owned"
  )
}

resource "aws_security_group_rule" "eks-alb-public-https" {
  description       = "Allow eks load balancer to communicate with public traffic securely."
  cidr_blocks       = ["0.0.0.0/0"]
  from_port         = 443
  protocol          = "tcp"
  security_group_id = aws_security_group.eks-alb.id
  to_port           = 443
  type              = "ingress"
}

resource "aws_security_group_rule" "eks-alb-public-http" {
  description       = "Allow eks load balancer to communicate with public traffic."
  cidr_blocks       = ["0.0.0.0/0"]
  from_port         = 80
  protocol          = "tcp"
  security_group_id = aws_security_group.eks-alb.id
  to_port           = 80
  type              = "ingress"
}

resource "aws_eks_cluster" "tf_eks_cluster" {
  name            = var.cluster_name
  role_arn        = aws_iam_role.tf-eks-cluster.arn

  vpc_config {
    security_group_ids      = [aws_security_group.tf-eks-cluster.id]
    subnet_ids              = var.app_subnet_ids
    endpoint_private_access = true
    endpoint_public_access  = false
  }

  depends_on = [
    aws_iam_role_policy_attachment.tf-eks-cluster-AmazonEKSClusterPolicy,
    aws_iam_role_policy_attachment.tf-eks-cluster-AmazonEKSServicePolicy,
  ]
}

# Setup for IAM role needed to setup an EKS cluster
resource "aws_iam_role" "tf-eks-cluster" {
  name = "tf-eks-cluster"

  assume_role_policy = <<POLICY
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "eks.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
POLICY
}

resource "aws_iam_role_policy_attachment" "tf-eks-cluster-AmazonEKSClusterPolicy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
  role       = aws_iam_role.tf-eks-cluster.name
}

resource "aws_iam_role_policy_attachment" "tf-eks-cluster-AmazonEKSServicePolicy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSServicePolicy"
  role       = aws_iam_role.tf-eks-cluster.name
}

########################################################################################
# Setup IAM role & instance profile for worker nodes

resource "aws_iam_role" "tf-eks-node" {
  name = "tf-eks-node"

  assume_role_policy = <<POLICY
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "ec2.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
POLICY
}

resource "aws_iam_instance_profile" "tf-eks-node" {
  name = "tf-eks-node"
  role = aws_iam_role.tf-eks-node.name
}

resource "aws_iam_role_policy_attachment" "tf-eks-node-AmazonEKSWorkerNodePolicy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
  role       = aws_iam_role.tf-eks-node.name
}

resource "aws_iam_role_policy_attachment" "tf-eks-node-AmazonEKS_CNI_Policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
  role       = aws_iam_role.tf-eks-node.name
}

resource "aws_iam_role_policy_attachment" "tf-eks-node-AmazonEC2ContainerRegistryReadOnly" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
  role       = aws_iam_role.tf-eks-node.name
}

resource "aws_iam_role_policy_attachment" "tf-eks-node-AmazonEC2FullAccess" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEC2FullAccess"
  role       = aws_iam_role.tf-eks-node.name
}

resource "aws_iam_role_policy_attachment" "tf-eks-node-alb-ingress_policy" {
  policy_arn = aws_iam_policy.alb-ingress.arn
  role       = aws_iam_role.tf-eks-node.name
}

resource "aws_iam_policy" "alb-ingress" {    
  name   = "alb-ingress-policy"
  policy = file("${path.module}/alb_ingress_policy.json")
}

# generate KUBECONFIG as output to save in ~/.kube/config locally
# save the 'terraform output eks_kubeconfig > config', run 'mv config ~/.kube/config' to use it for kubectl
locals {
  kubeconfig = <<KUBECONFIG


apiVersion: v1
clusters:
- cluster:
    server: ${aws_eks_cluster.tf_eks_cluster.endpoint}
    certificate-authority-data: ${aws_eks_cluster.tf_eks_cluster.certificate_authority.0.data}
  name: kubernetes
contexts:
- context:
    cluster: kubernetes
    user: aws
  name: aws
current-context: aws
kind: Config
preferences: {}
users:
- name: aws
  user:
    exec:
      apiVersion: client.authentication.k8s.io/v1alpha1
      command: aws-iam-authenticator
      args:
        - "token"
        - "-i"
        - "${var.cluster_name}"
KUBECONFIG
}

########################################################################################
# Setup AutoScaling Group for worker nodes

# Setup data source to get amazon-provided AMI for EKS nodes
data "aws_ami" "eks-worker" {
  filter {
    name   = "name"
    values = ["amazon-eks-node-v*"]
  }

  most_recent = true
  owners      = ["602401143452"] # Amazon EKS AMI Account ID
}

# Is provided in demo code, no idea what it's used for though! TODO: DELETE
# data "aws_region" "current" {}

# EKS currently documents this required userdata for EKS worker nodes to
# properly configure Kubernetes applications on the EC2 instance.
# We utilize a Terraform local here to simplify Base64 encode this
# information and write it into the AutoScaling Launch Configuration.
# More information: https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html
locals {
  tf-eks-node-userdata = <<USERDATA
#!/bin/bash
set -o xtrace
/etc/eks/bootstrap.sh --apiserver-endpoint '${aws_eks_cluster.tf_eks_cluster.endpoint}' --b64-cluster-ca '${aws_eks_cluster.tf_eks_cluster.certificate_authority.0.data}' '${var.cluster_name}'
USERDATA
}


resource "aws_launch_configuration" "tf_eks_cluster" {

  associate_public_ip_address = true

  iam_instance_profile        = aws_iam_instance_profile.tf-eks-node.name

  image_id                    = data.aws_ami.eks-worker.id

  instance_type               = var.instance_type

  name_prefix                 = "tf-eks-spot"

  security_groups             = [aws_security_group.tf-eks-node.id]

  user_data_base64            = base64encode(local.tf-eks-node-userdata)

  lifecycle {
    create_before_destroy = true
  }
}

resource "aws_lb_target_group" "tf_eks_cluster" {
  name = "tf-eks-cluster"
  port = 31742
  protocol = "HTTP"
  vpc_id = var.vpc_id
  target_type = "instance"
}

resource "aws_autoscaling_group" "tf_eks_cluster" {
  desired_capacity     = "2"
  launch_configuration = aws_launch_configuration.tf_eks_cluster.id
  max_size             = "3"
  min_size             = 1
  name                 = "tf-eks-cluster"
  vpc_zone_identifier  = var.app_subnet_ids
  target_group_arns    = [aws_lb_target_group.tf_eks_cluster.arn]

  tag {
    key                 = "Name"
    value               = "tf-eks-cluster"
    propagate_at_launch = true
  }

  tag {
    key                 = "kubernetes.io/cluster/${var.cluster_name}"
    value               = "owned"
    propagate_at_launch = true
  }
}

resource "aws_security_group" "tf-eks-cluster" {
    name        = "terraform-eks-cluster"
    description = "Cluster communication with worker nodes"
    vpc_id      = var.vpc_id

    egress {
        from_port   = 0
        to_port     = 0
        protocol    = "-1"
        cidr_blocks = ["0.0.0.0/0"]
    }

    tags = {
        Name = "terraform-eks"
    }
}

resource "aws_security_group" "tf-eks-node" {
    name        = "terraform-eks-node"
    description = "Security group for all nodes in the cluster"
    vpc_id      = var.vpc_id

    egress {
        from_port   = 0
        to_port     = 0
        protocol    = "-1"
        cidr_blocks = ["0.0.0.0/0"]
    }

    tags = {
        Name = "terraform-eks"
    }
}

# Allow inbound traffic from your local workstation external IP
# to the Kubernetes. You will need to replace A.B.C.D below with
# your real IP. Services like icanhazip.com can help you find this.
resource "aws_security_group_rule" "tf-eks-cluster-ingress-workstation-https" {
  cidr_blocks       = [var.accessing_computer_ip]
  description       = "Allow workstation to communicate with the cluster API Server"
  from_port         = 443
  protocol          = "tcp"
  security_group_id = aws_security_group.tf-eks-cluster.id
  to_port           = 443
  type              = "ingress"
}

########################################################################################
# Setup worker node security group

resource "aws_security_group_rule" "tf-eks-node-ingress-self" {
  description              = "Allow node to communicate with each other"
  from_port                = 0
  protocol                 = "-1"
  security_group_id        = aws_security_group.tf-eks-node.id
  source_security_group_id = aws_security_group.tf-eks-node.id
  to_port                  = 65535
  type                     = "ingress"
}

resource "aws_security_group_rule" "tf-eks-node-ingress-cluster" {
  description              = "Allow worker Kubelets and pods to receive communication from the cluster control plane"
  from_port                = 1025
  protocol                 = "tcp"
  security_group_id        = aws_security_group.tf-eks-node.id
  source_security_group_id = aws_security_group.tf-eks-cluster.id
  to_port                  = 65535
  type                     = "ingress"
}

# allow worker nodes to access EKS master
resource "aws_security_group_rule" "tf-eks-cluster-ingress-node-https" {
  description              = "Allow pods to communicate with the cluster API Server"
  from_port                = 443
  protocol                 = "tcp"
  security_group_id        = aws_security_group.tf-eks-node.id
  source_security_group_id = aws_security_group.tf-eks-cluster.id
  to_port                  = 443
  type                     = "ingress"
}

resource "aws_security_group_rule" "tf-eks-node-ingress-master" {
  description              = "Allow cluster control to receive communication from the worker Kubelets"
  from_port                = 443
  protocol                 = "tcp"
  security_group_id        = aws_security_group.tf-eks-cluster.id
  source_security_group_id = aws_security_group.tf-eks-node.id
  to_port                  = 443
  type                     = "ingress"
}

resource "aws_internet_gateway" "eks" {

  vpc_id = aws_vpc.eks.id

  tags = {
    Name = "internet_gateway"
  }
}


resource "aws_eip" "nat_gateway" {

  count = var.subnet_count

  vpc   = true
}

resource "aws_nat_gateway" "eks" {

  count = var.subnet_count

  allocation_id = aws_eip.nat_gateway.*.id[count.index]

  subnet_id = aws_subnet.gateway.*.id[count.index]

  tags = {
    Name = "nat_gateway"
  }

  depends_on = [aws_internet_gateway.eks]
}

resource "aws_route_table" "application" {

  count = var.subnet_count

  vpc_id = aws_vpc.eks.id

  route {

    cidr_block = "0.0.0.0/0"

    nat_gateway_id = aws_nat_gateway.eks.*.id[count.index]
  }

  tags = {
    Name = "eks_application"
  }
}

resource "aws_route_table" "vpn" {

  vpc_id = aws_vpc.eks.id

  tags = {
    Name = "eks_vpn"
  }
}

resource "aws_route_table" "gateway" {

  vpc_id = aws_vpc.eks.id

  route {

    cidr_block = "0.0.0.0/0"

    gateway_id = aws_internet_gateway.eks.id
  }

  tags = {
    Name = "eks_gateway"
  }
}

resource "aws_route_table_association" "application" {

  count = var.subnet_count

  subnet_id      = aws_subnet.application.*.id[count.index]

  route_table_id = aws_route_table.application.*.id[count.index]
}

resource "aws_route_table_association" "vpn" {
  count = var.subnet_count

  subnet_id      = aws_subnet.vpn.*.id[count.index]
  route_table_id = aws_route_table.vpn.id
}

resource "aws_route_table_association" "gateway" {
  count = var.subnet_count

  subnet_id      = aws_subnet.gateway.*.id[count.index]
  route_table_id = aws_route_table.gateway.id
}

data "aws_availability_zones" "available" {}

resource "aws_subnet" "gateway" {
  count = var.subnet_count
  availability_zone = data.aws_availability_zones.available.names[count.index]
  cidr_block        = "10.0.1${count.index}.0/24"
  vpc_id            = aws_vpc.eks.id
  map_public_ip_on_launch = true

  tags = {
    Name = "eks_gateway"
  }  
}

resource "aws_subnet" "application" {
  count = var.subnet_count
  availability_zone = data.aws_availability_zones.available.names[count.index]
  cidr_block        = "10.0.2${count.index}.0/24"
  vpc_id            = aws_vpc.eks.id
  map_public_ip_on_launch = true

  tags = map(
      "Name", "eks_application",
      "kubernetes.io/cluster/${var.cluster_name}", "shared"
    )
}

resource "aws_subnet" "vpn" {
  count = var.subnet_count
  availability_zone = data.aws_availability_zones.available.names[count.index]
  cidr_block        = "10.0.3${count.index}.0/24"
  vpc_id            = aws_vpc.eks.id

  tags = {
    Name = "eks_vpn"
  }  
}

resource "aws_vpc" "eks" {

  cidr_block = "10.0.0.0/16"

  enable_dns_hostnames = true

  enable_dns_support = true

  tags = map(
      "Name", "eks-vpc",
      "kubernetes.io/cluster/${var.cluster_name}", "shared"
    )
}

I had tried to create the Kubernetes deployment in a single massive Terraform manifest. I needed to separate the Kubernetes deployment into a separate Terraform manifest which I applied after updating the ~/.kube/config file.

The DNS errors were due to this file not being current for the new cluster.

Additionally, I needed to ensure that endpoint_private_access = true is set in the eks cluster resource.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM