[英]Why Azure Load Balancer gets created in AKS even though I have used AppGateway as an Ingress Controller?
[英]What's the correct way to setup AKS cluster Static IP, Load Balancer and Ingress Controller?
现在我尝试在 AKS 上配置集群已经有好几天了,但我一直在文档的各个部分、SO 上的各种问题、Medium 上的文章之间跳来跳去……所有这些都是为了不断失败。
目标是获得一个 static ip 和一个 dns,我可以使用它来将我的应用程序连接到部署在 AKS 上的服务器。
我已经通过 terraform 创建了基础设施,它由一个资源组组成,我在其中创建了一个公共 IP 和 AKS 集群,到目前为止一切顺利。
在尝试使用在创建集群时使用选项http_application_routing_enabled = true
时安装的入口 controller 后,文档不鼓励生产https://learn.microsoft.com/en-us/azure/aks/http-application-路由,我正在尝试推荐的方式并通过 Helm https://learn.microsoft.com/en-us/azure/aks/ingress-basic?tabs=azure-cli安装 ingress-nginx controller。
在 terraform 我是这样安装的
resource "azurerm_resource_group" "resource_group" {
name = var.resource_group_name
location = var.location
tags = {
Environment = "Test"
Team = "DevOps"
}
}
resource "azurerm_kubernetes_cluster" "server_cluster" {
name = "server_cluster"
location = azurerm_resource_group.resource_group.location
resource_group_name = azurerm_resource_group.resource_group.name
dns_prefix = "fixit"
kubernetes_version = var.kubernetes_version
# sku_tier = "Paid"
default_node_pool {
name = "default"
node_count = 1
min_count = 1
max_count = 3
# vm_size = "standard_b2s_v5"
# vm_size = "standard_e2bs_v5"
vm_size = "standard_b4ms"
type = "VirtualMachineScaleSets"
enable_auto_scaling = true
enable_host_encryption = false
# os_disk_size_gb = 30
# enable_node_public_ip = true
}
service_principal {
client_id = var.sp_client_id
client_secret = var.sp_client_secret
}
tags = {
Environment = "Production"
}
linux_profile {
admin_username = "azureuser"
ssh_key {
key_data = var.ssh_key
}
}
network_profile {
network_plugin = "kubenet"
load_balancer_sku = "standard"
# load_balancer_sku = "basic"
}
# http_application_routing_enabled = true
http_application_routing_enabled = false
}
resource "azurerm_public_ip" "public-ip" {
name = "fixit-public-ip"
location = var.location
resource_group_name = var.resource_group_name
allocation_method = "Static"
domain_name_label = "fixit"
sku = "Standard"
}
resource "kubernetes_service" "cluster-ingress" {
metadata {
name = "cluster-ingress-svc"
annotations = {
"service.beta.kubernetes.io/azure-load-balancer-resource-group" = "fixit-resource-group"
# Warning SyncLoadBalancerFailed 2m38s (x8 over 12m) service-controller Error syncing load balancer:
# failed to ensure load balancer: findMatchedPIPByLoadBalancerIP: cannot find public IP with IP address 52.157.90.236
# in resource group MC_fixit-resource-group_server_cluster_westeurope
# "service.beta.kubernetes.io/azure-load-balancer-resource-group" = "MC_fixit-resource-group_server_cluster_westeurope"
# kubernetes.io/ingress.class: addon-http-application-routing
}
}
spec {
# type = "Ingress"
type = "LoadBalancer"
load_balancer_ip = var.public_ip_address
selector = {
name = "cluster-ingress-svc"
}
port {
name = "cluster-port"
protocol = "TCP"
port = 3000
target_port = "80"
}
}
}
resource "helm_release" "nginx" {
name = "ingress-nginx"
repository = "https://kubernetes.github.io/ingress-nginx"
chart = "ingress-nginx"
namespace = "default"
set {
name = "rbac.create"
value = "false"
}
set {
name = "controller.service.externalTrafficPolicy"
value = "Local"
}
set {
name = "controller.service.loadBalancerIP"
value = var.public_ip_address
}
set {
name = "controller.service.annotations.service.beta.kubernetes.io/azure-load-balancer-internal"
value = "true"
}
# --set controller.service.annotations."service\.beta\.kubernetes\.io/azure-load-balancer-health-probe-request-path"=/healthz
set {
name = "controller.service.annotations.service\\.beta\\.kubernetes\\.io/azure-load-balancer-health-probe-request-path"
value = "/healthz"
}
}
但安装失败并显示来自 terraform 的消息
Warning: Helm release "ingress-nginx" was created but has a failed status. Use the `helm` command to investigate the error, correct it, then run Terraform again.
│
│ with module.ingress_controller.helm_release.nginx,
│ on modules/ingress_controller/controller.tf line 2, in resource "helm_release" "nginx":
│ 2: resource "helm_release" "nginx" {
│
╵
╷
│ Error: timed out waiting for the condition
│
│ with module.ingress_controller.helm_release.nginx,
│ on modules/ingress_controller/controller.tf line 2, in resource "helm_release" "nginx":
│ 2: resource "helm_release" "nginx" {
controller 打印出来
vincenzocalia@vincenzos-MacBook-Air helm_charts % kubectl describe svc ingress-nginx-controller
Name: ingress-nginx-controller
Namespace: default
Labels: app.kubernetes.io/component=controller
app.kubernetes.io/instance=ingress-nginx
app.kubernetes.io/managed-by=Helm
app.kubernetes.io/name=ingress-nginx
app.kubernetes.io/part-of=ingress-nginx
app.kubernetes.io/version=1.5.1
helm.sh/chart=ingress-nginx-4.4.2
Annotations: meta.helm.sh/release-name: ingress-nginx
meta.helm.sh/release-namespace: default
service: map[beta:map[kubernetes:map[io/azure-load-balancer-internal:true]]]
service.beta.kubernetes.io/azure-load-balancer-health-probe-request-path: /healthz
Selector: app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress-nginx,app.kubernetes.io/name=ingress-nginx
Type: LoadBalancer
IP Family Policy: SingleStack
IP Families: IPv4
IP: 10.0.173.243
IPs: 10.0.173.243
IP: 52.157.90.236
Port: http 80/TCP
TargetPort: http/TCP
NodePort: http 31709/TCP
Endpoints:
Port: https 443/TCP
TargetPort: https/TCP
NodePort: https 30045/TCP
Endpoints:
Session Affinity: None
External Traffic Policy: Local
HealthCheck NodePort: 32500
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal EnsuringLoadBalancer 32s (x5 over 108s) service-controller Ensuring load balancer
Warning SyncLoadBalancerFailed 31s (x5 over 107s) service-controller Error syncing load balancer: failed to ensure load balancer: findMatchedPIPByLoadBalancerIP: cannot find public IP with IP address 52.157.90.236 in resource group mc_fixit-resource-group_server_cluster_westeurope
vincenzocalia@vincenzos-MacBook-Air helm_charts % az aks show --resource-group fixit-resource-group --name server_cluster --query nodeResourceGroup -o tsv
MC_fixit-resource-group_server_cluster_westeurope
为什么它会在MC_fixit-resource-group_server_cluster_westeurope
资源组中查找,而不是在我为集群、公共 IP 和负载均衡器创建的fixit-resource-group
中查找?
如果我在MC_fixit-resource-group_server_cluster_westeurope
controller 负载均衡器 ip 更改为公共 ip,那么 terraform 仍然会输出相同的错误,但是 controller 会打印出来并正确分配给 8683 load balance18r18
set {
name = "controller.service.loadBalancerIP"
value = "20.73.192.77" #var.public_ip_address
}
vincenzocalia@vincenzos-MacBook-Air helm_charts % kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
cluster-ingress-svc LoadBalancer 10.0.110.114 52.157.90.236 3000:31863/TCP 104m
ingress-nginx-controller LoadBalancer 10.0.106.201 20.73.192.77 80:30714/TCP,443:32737/TCP 41m
ingress-nginx-controller-admission ClusterIP 10.0.23.188 <none> 443/TCP 41m
kubernetes ClusterIP 10.0.0.1 <none> 443/TCP 122m
vincenzocalia@vincenzos-MacBook-Air helm_charts % kubectl describe svc ingress-nginx-controller
Name: ingress-nginx-controller
Namespace: default
Labels: app.kubernetes.io/component=controller
app.kubernetes.io/instance=ingress-nginx
app.kubernetes.io/managed-by=Helm
app.kubernetes.io/name=ingress-nginx
app.kubernetes.io/part-of=ingress-nginx
app.kubernetes.io/version=1.5.1
helm.sh/chart=ingress-nginx-4.4.2
Annotations: meta.helm.sh/release-name: ingress-nginx
meta.helm.sh/release-namespace: default
service: map[beta:map[kubernetes:map[io/azure-load-balancer-internal:true]]]
service.beta.kubernetes.io/azure-load-balancer-health-probe-request-path: /healthz
Selector: app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress-nginx,app.kubernetes.io/name=ingress-nginx
Type: LoadBalancer
IP Family Policy: SingleStack
IP Families: IPv4
IP: 10.0.106.201
IPs: 10.0.106.201
IP: 20.73.192.77
LoadBalancer Ingress: 20.73.192.77
Port: http 80/TCP
TargetPort: http/TCP
NodePort: http 30714/TCP
Endpoints:
Port: https 443/TCP
TargetPort: https/TCP
NodePort: https 32737/TCP
Endpoints:
Session Affinity: None
External Traffic Policy: Local
HealthCheck NodePort: 32538
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal EnsuringLoadBalancer 39m (x2 over 41m) service-controller Ensuring load balancer
Normal EnsuredLoadBalancer 39m (x2 over 41m) service-controller Ensured load balancer
vincenzocalia@vincenzos-MacBook-Air helm_charts %
在这里阅读https://learn.microsoft.com/en-us/azure/aks/faq#why-are-two-resource-groups-created-with-aks
为了启用此体系结构,每个 AKS 部署跨越两个资源组:您创建第一个资源组。 该组仅包含 Kube.netes 服务资源。 AKS 资源提供程序在部署期间自动创建第二个资源组。 第二个资源组的示例是 MC_myResourceGroup_myAKSCluster_eastus。 有关如何指定此第二个资源组的名称的信息,请参阅下一节。 第二个资源组称为节点资源组,包含与集群关联的所有基础结构资源。 这些资源包括 Kube.netes 节点虚拟机、虚拟网络和存储。 默认情况下,节点资源组的名称类似于 MC_myResourceGroup_myAKSCluster_eastus。 每当删除集群时,AKS 都会自动删除节点资源组,因此它应该只用于共享集群生命周期的资源。
根据我创建的资源类型,我应该通过第一组还是第二组? 例如, kube.netes_service
需要第一个 rg,而azurerm_public_ip
需要第二个 rg?
我在这里错过了什么? 请像我 5 岁时那样解释,因为我现在感觉……
非常感谢
终于找到问题所在了。
实际上,需要在node resource group
中创建Public IP
,因为入口loadBalancerIP
(分配给Public IP
地址的 loadBalancerIP)将在node resource group
中查找它,因此如果您在resource group
中创建它会失败我得到的错误。
节点资源组名称是在集群创建时分配的,例如。 MC_myResourceGroup_myAKSCluster_eastus
,但您可以使用参数node_resource_group = var.node_resource_group_name
。
此外,公共 IP sku
"Standard"
(需要指定)或"Basic"
(默认),以及集群load_balancer_sku
"standard"
或"basic"
(她没有默认值,需要指定)必须匹配。
我还将 Public IP 放在集群模块中,这样它就可以依赖它,以避免在它之前创建并且由于node resource group
尚未创建而失败,无法在main.tf
文件中正确设置该依赖关系。
所以现在的工作配置是:
terraform {
required_version = ">=1.1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 3.0.2"
}
}
}
provider "azurerm" {
features {
resource_group {
prevent_deletion_if_contains_resources = false
}
}
subscription_id = var.azure_subscription_id
tenant_id = var.azure_subscription_tenant_id
client_id = var.service_principal_appid
client_secret = var.service_principal_password
}
provider "kubernetes" {
host = "${module.cluster.host}"
client_certificate = "${base64decode(module.cluster.client_certificate)}"
client_key = "${base64decode(module.cluster.client_key)}"
cluster_ca_certificate = "${base64decode(module.cluster.cluster_ca_certificate)}"
}
provider "helm" {
kubernetes {
host = "${module.cluster.host}"
client_certificate = "${base64decode(module.cluster.client_certificate)}"
client_key = "${base64decode(module.cluster.client_key)}"
cluster_ca_certificate = "${base64decode(module.cluster.cluster_ca_certificate)}"
}
}
module "cluster" {
source = "./modules/cluster"
location = var.location
vm_size = var.vm_size
resource_group_name = var.resource_group_name
node_resource_group_name = var.node_resource_group_name
kubernetes_version = var.kubernetes_version
ssh_key = var.ssh_key
sp_client_id = var.service_principal_appid
sp_client_secret = var.service_principal_password
}
module "ingress-controller" {
source = "./modules/ingress-controller"
public_ip_address = module.cluster.public_ip_address
depends_on = [
module.cluster.public_ip_address
]
}
resource "azurerm_resource_group" "resource_group" {
name = var.resource_group_name
location = var.location
tags = {
Environment = "test"
Team = "DevOps"
}
}
resource "azurerm_kubernetes_cluster" "server_cluster" {
name = "server_cluster"
### choose the resource goup to use for the cluster
location = azurerm_resource_group.resource_group.location
resource_group_name = azurerm_resource_group.resource_group.name
### decide the name of the cluster "node" resource group, if unset will be named automatically
node_resource_group = var.node_resource_group_name
dns_prefix = "fixit"
kubernetes_version = var.kubernetes_version
# sku_tier = "Paid"
default_node_pool {
name = "default"
node_count = 1
min_count = 1
max_count = 3
vm_size = var.vm_size
type = "VirtualMachineScaleSets"
enable_auto_scaling = true
enable_host_encryption = false
# os_disk_size_gb = 30
}
service_principal {
client_id = var.sp_client_id
client_secret = var.sp_client_secret
}
tags = {
Environment = "Production"
}
linux_profile {
admin_username = "azureuser"
ssh_key {
key_data = var.ssh_key
}
}
network_profile {
network_plugin = "kubenet"
load_balancer_sku = "basic"
}
http_application_routing_enabled = false
depends_on = [
azurerm_resource_group.resource_group
]
}
resource "azurerm_public_ip" "public-ip" {
name = "fixit-public-ip"
location = var.location
# resource_group_name = var.resource_group_name
resource_group_name = var.node_resource_group_name
allocation_method = "Static"
domain_name_label = "fixit"
# sku = "Standard"
depends_on = [
azurerm_kubernetes_cluster.server_cluster
]
}
resource "helm_release" "nginx" {
name = "ingress-nginx"
repository = "ingress-nginx"
chart = "ingress-nginx/ingress-nginx"
namespace = "default"
set {
name = "controller.service.externalTrafficPolicy"
value = "Local"
}
set {
name = "controller.service.annotations.service.beta.kubernetes.io/azure-load-balancer-internal"
value = "true"
}
set {
name = "controller.service.loadBalancerIP"
value = var.public_ip_address
}
set {
name = "controller.service.annotations.service.beta.kubernetes.io/azure-load-balancer-health-probe-request-path"
value = "/healthz"
}
}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ingress-service
# namespace: default
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "false"
nginx.ingress.kubernetes.io/use-regex: "true"
nginx.ingress.kubernetes.io/rewrite-target: /$2$3$4
spec:
ingressClassName: nginx
rules:
# - host: fixit.westeurope.cloudapp.azure.com #dns from Azure PublicIP
### Node.js server
- http:
paths:
- path: /(/|$)(.*)
pathType: Prefix
backend:
service:
name: server-clusterip-service
port:
number: 80
- http:
paths:
- path: /server(/|$)(.*)
pathType: Prefix
backend:
service:
name: server-clusterip-service
port:
number: 80
...
other services omitted
希望这可以帮助其他人在正确设置时遇到困难。 干杯。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.