繁体   English   中英

prometheus 不会从 kubernetes 集群中的 treafik 服务中提取数据

[英]prometheus do not pull data from treafik service in kubernetes cluster

我正在使用 prometheus( quay.azk8s.cn/prometheus/prometheus:v2.15.2 ) 在 kubernetes monitoring命名空间中监控 traefik 2.1.6,现在我让 traefik 公开 metics,我可以使用 curl 命令从http://traefik-ip:8080/metrics获取配置http://traefik-ip:8080/metrics ,但是 prometheus 不拉数据。我已经在 kubernetes kube-system命名空间中向 treafik 服务 yaml 添加了注释,这是 prometheus 服务配置:

{
  "kind": "StatefulSet",
  "apiVersion": "apps/v1beta2",
  "metadata": {
    "name": "prometheus-k8s",
    "namespace": "monitoring",
    "selfLink": "/apis/apps/v1beta2/namespaces/monitoring/statefulsets/prometheus-k8s",
    "uid": "4190d704-aa3b-40da-ab99-bac3cb10f186",
    "resourceVersion": "18281285",
    "generation": 7,
    "creationTimestamp": "2020-03-04T16:31:01Z",
    "labels": {
      "prometheus": "k8s"
    },
    "annotations": {
      "prometheus-operator-input-hash": "4895445337133709592"
    },
    "ownerReferences": [
      {
        "apiVersion": "monitoring.coreos.com/v1",
        "kind": "Prometheus",
        "name": "k8s",
        "uid": "ddf7e48d-f982-4881-9312-0d50466870a9",
        "controller": true,
        "blockOwnerDeletion": true
      }
    ]
  },
  "spec": {
    "replicas": 2,
    "selector": {
      "matchLabels": {
        "app": "prometheus",
        "prometheus": "k8s"
      }
    },
    "template": {
      "metadata": {
        "creationTimestamp": null,
        "labels": {
          "app": "prometheus",
          "prometheus": "k8s"
        }
      },
      "spec": {
        "volumes": [
          {
            "name": "config",
            "secret": {
              "secretName": "prometheus-k8s",
              "defaultMode": 420
            }
          },
          {
            "name": "tls-assets",
            "secret": {
              "secretName": "prometheus-k8s-tls-assets",
              "defaultMode": 420
            }
          },
          {
            "name": "config-out",
            "emptyDir": {}
          },
          {
            "name": "prometheus-k8s-rulefiles-0",
            "configMap": {
              "name": "prometheus-k8s-rulefiles-0",
              "defaultMode": 420
            }
          },
          {
            "name": "prometheus-k8s-db",
            "emptyDir": {}
          }
        ],
        "containers": [
          {
            "name": "prometheus",
            "image": "quay.azk8s.cn/prometheus/prometheus:v2.15.2",
            "args": [
              "--web.console.templates=/etc/prometheus/consoles",
              "--web.console.libraries=/etc/prometheus/console_libraries",
              "--config.file=/etc/prometheus/config_out/prometheus.env.yaml",
              "--storage.tsdb.path=/prometheus",
              "--storage.tsdb.retention.time=24h",
              "--web.enable-lifecycle",
              "--storage.tsdb.no-lockfile",
              "--web.route-prefix=/"
            ],
            "ports": [
              {
                "name": "web",
                "containerPort": 9090,
                "protocol": "TCP"
              }
            ],
            "resources": {
              "requests": {
                "memory": "400Mi"
              }
            },
            "volumeMounts": [
              {
                "name": "config-out",
                "readOnly": true,
                "mountPath": "/etc/prometheus/config_out"
              },
              {
                "name": "tls-assets",
                "readOnly": true,
                "mountPath": "/etc/prometheus/certs"
              },
              {
                "name": "prometheus-k8s-db",
                "mountPath": "/prometheus"
              },
              {
                "name": "prometheus-k8s-rulefiles-0",
                "mountPath": "/etc/prometheus/rules/prometheus-k8s-rulefiles-0"
              }
            ],
            "livenessProbe": {
              "httpGet": {
                "path": "/-/healthy",
                "port": "web",
                "scheme": "HTTP"
              },
              "timeoutSeconds": 3,
              "periodSeconds": 5,
              "successThreshold": 1,
              "failureThreshold": 6
            },
            "readinessProbe": {
              "httpGet": {
                "path": "/-/ready",
                "port": "web",
                "scheme": "HTTP"
              },
              "timeoutSeconds": 3,
              "periodSeconds": 5,
              "successThreshold": 1,
              "failureThreshold": 120
            },
            "terminationMessagePath": "/dev/termination-log",
            "terminationMessagePolicy": "FallbackToLogsOnError",
            "imagePullPolicy": "IfNotPresent"
          },
          {
            "name": "prometheus-config-reloader",
            "image": "quay.azk8s.cn/coreos/prometheus-config-reloader:v0.37.0",
            "command": [
              "/bin/prometheus-config-reloader"
            ],
            "args": [
              "--log-format=logfmt",
              "--reload-url=http://localhost:9090/-/reload",
              "--config-file=/etc/prometheus/config/prometheus.yaml.gz",
              "--config-envsubst-file=/etc/prometheus/config_out/prometheus.env.yaml"
            ],
            "env": [
              {
                "name": "POD_NAME",
                "valueFrom": {
                  "fieldRef": {
                    "apiVersion": "v1",
                    "fieldPath": "metadata.name"
                  }
                }
              }
            ],
            "resources": {
              "limits": {
                "cpu": "100m",
                "memory": "25Mi"
              },
              "requests": {
                "cpu": "100m",
                "memory": "25Mi"
              }
            },
            "volumeMounts": [
              {
                "name": "config",
                "mountPath": "/etc/prometheus/config"
              },
              {
                "name": "config-out",
                "mountPath": "/etc/prometheus/config_out"
              }
            ],
            "terminationMessagePath": "/dev/termination-log",
            "terminationMessagePolicy": "FallbackToLogsOnError",
            "imagePullPolicy": "IfNotPresent"
          },
          {
            "name": "rules-configmap-reloader",
            "image": "jimmidyson/configmap-reload:v0.3.0",
            "args": [
              "--webhook-url=http://localhost:9090/-/reload",
              "--volume-dir=/etc/prometheus/rules/prometheus-k8s-rulefiles-0"
            ],
            "resources": {
              "limits": {
                "cpu": "100m",
                "memory": "25Mi"
              },
              "requests": {
                "cpu": "100m",
                "memory": "25Mi"
              }
            },
            "volumeMounts": [
              {
                "name": "prometheus-k8s-rulefiles-0",
                "mountPath": "/etc/prometheus/rules/prometheus-k8s-rulefiles-0"
              }
            ],
            "terminationMessagePath": "/dev/termination-log",
            "terminationMessagePolicy": "FallbackToLogsOnError",
            "imagePullPolicy": "IfNotPresent"
          }
        ],
        "restartPolicy": "Always",
        "terminationGracePeriodSeconds": 600,
        "dnsPolicy": "ClusterFirst",
        "nodeSelector": {
          "kubernetes.io/os": "linux"
        },
        "serviceAccountName": "prometheus-k8s",
        "serviceAccount": "prometheus-k8s",
        "securityContext": {
          "runAsUser": 1000,
          "runAsNonRoot": true,
          "fsGroup": 2000
        },
        "schedulerName": "default-scheduler"
      }
    },
    "serviceName": "prometheus-operated",
    "podManagementPolicy": "Parallel",
    "updateStrategy": {
      "type": "RollingUpdate"
    },
    "revisionHistoryLimit": 10
  },
  "status": {
    "observedGeneration": 7,
    "replicas": 2,
    "readyReplicas": 2,
    "currentReplicas": 2,
    "updatedReplicas": 2,
    "currentRevision": "prometheus-k8s-6f76f69569",
    "updateRevision": "prometheus-k8s-6f76f69569",
    "collisionCount": 0
  }
}


this is the config:


    {
      "kind": "Service",
      "apiVersion": "v1",
      "metadata": {
        "name": "traefik",
        "namespace": "kube-system",
        "selfLink": "/api/v1/namespaces/kube-system/services/traefik",
        "uid": "b2695279-2467-4480-aab5-a720a43951c1",
        "resourceVersion": "18280221",
        "creationTimestamp": "2020-01-29T10:26:34Z",
        "annotations": {
          "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"v1\",\"kind\":\"Service\",\"metadata\":{\"annotations\":{\"prometheus.io/port\":\"8080\",\"prometheus.io/scrape\":\"true\"},\"name\":\"traefik\",\"namespace\":\"kube-system\"},\"spec\":{\"ports\":[{\"name\":\"web\",\"port\":80},{\"name\":\"websecure\",\"port\":443},{\"name\":\"metrics\",\"port\":8080}],\"selector\":{\"app\":\"traefik\"}}}\n",
          "prometheus.io/port": "8080",
          "prometheus.io/scrape": "true"
        }
      },
      "spec": {
        "ports": [
          {
            "name": "web",
            "protocol": "TCP",
            "port": 80,
            "targetPort": 80
          },
          {
            "name": "websecure",
            "protocol": "TCP",
            "port": 443,
            "targetPort": 443
          },
          {
            "name": "metrics",
            "protocol": "TCP",
            "port": 8080,
            "targetPort": 8080
          }
        ],
        "selector": {
          "app": "traefik"
        },
        "clusterIP": "10.254.169.66",
        "type": "ClusterIP",
        "sessionAffinity": "None"
      },
      "status": {
        "loadBalancer": {}
      }
    }

我阅读了一些文档,提示我应该在 kubernetes(v1.15.2) 配置映射中配置 pull 任务,如下所示:

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: kube-ops
data:
  prometheus.yml: |
    global:
      scrape_interval: 30s
      scrape_timeout: 30s
    scrape_configs:
    - job_name: 'prometheus'
      static_configs:
      - targets: ['localhost:9090']
    - job_name: 'traefik'
      static_configs:
      - targets: ['traefik-ingress-service.kube-system.svc.cluster.local:8080']

我在我的 prometheus yaml 中添加了配置。我错过了什么吗? 我执行以下步骤:

  • 公开 traefik 指标网址(成功)
  • 为我的 traefik 服务添加注释(成功)

但是没有收集指标数据,我在这个问题上卡了 2 天,我应该怎么做才能使它工作? 这是我的 prometheus 的服务发现仪表板:

在此处输入图片说明

但是当我从 prometheus 查询数据时,我一无所获。

http_requests_total{job="traefik"}

注意treafik检查pull数据的请求查询的新版本(v2.1.6)是:

traefik_entrypoint_requests_total{job="traefik"}

可以看到prometheus拉取数据成功。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM