Control plane components consist API Server, Controller Manager, Scheduler, ETCD, and CoreDNS. These components run as a pod within the control plane node. As control plane nodes orchestrate the functioning of the entire Kubernetes cluster, it is necessary to monitor the KPI metrics of these components that can effect the overall health of the Kubernetes cluster.
-
To monitor the control plane nodes, you must configure the targets of these components within Prometheus. Each pod has metrics exposed in prometheus format in the /metrics endpoint at the corresponding port.
- As the control plane metrics are monitored through Prometheus, you must configure the metrics endpoint of the respective control plane components in Prometheus.
API Servers
Port: 6443
Metrics Endpoint: https://%3CMaster_Node_IP%3E:6443/metrics
- API Server Metrics
- Request Rate[5m]
- Success Request Rate[5m]
- Error Request Rate[5m]
- GO Metrics
- Go Routines
- HTTP Request Metrics
- Request Rate[5m]
- Success Request Rate[5m]
- Error Request Rate[5m]
- Work Queue Metrics
- Work Queue Adds[5m]
- Work Queue Depth[5m]
- Work Queue Duration[5m]
- job_name: 'apiserver'
scheme: https
authorization:
type: Bearer
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
kubernetes_sd_configs:
- role: pod
relabel_configs:
- separator: ;
regex: (.*)
target_label: __address__
replacement: kubernetes.default.svc:443
action: replace
- action: keep
regex: kube-system;kube-apiserver
source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_pod_label_component
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
Controller Manager
Port: 10257
Metrics Endpoint: https://<Master_Node_IP>:10257/metrics
- GO Metrics
- Go Routines
- HTTP Request Metrics
- Request Rate[5m]
- Success Request Rate[5m]
- Error Request Rate[5m]
- Work Queue Metrics
- Work Queue Adds[5m]
- Work Queue Depth[5m]
- Work Queue Duration[5m]
- job_name: serviceMonitor/default/prometheus-kube-prometheus-kube-controller-manager/0
honor_timestamps: true
scrape_interval: 30s
scrape_timeout: 10s
metrics_path: /metrics
scheme: https
authorization:
type: Bearer
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
follow_redirects: true
enable_http2: true
relabel_configs:
- source_labels: [job]
separator: ;
regex: (.*)
target_label: __tmp_prometheus_job_name
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app]
separator: ;
regex: (kube-prometheus-stack-kube-controller-manager);true
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release]
separator: ;
regex: (prometheus);true
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
separator: ;
regex: http-metrics
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Node;(.*)
target_label: node
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Pod;(.*)
target_label: pod
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: namespace
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: service
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_name]
separator: ;
regex: (.*)
target_label: pod
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: (.*)
target_label: container
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_phase]
separator: ;
regex: (Failed|Succeeded)
replacement: $1
action: drop
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: job
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_service_label_jobLabel]
separator: ;
regex: (.+)
target_label: job
replacement: ${1}
action: replace
- separator: ;
regex: (.*)
target_label: endpoint
replacement: http-metrics
action: replace
- source_labels: [__address__]
separator: ;
regex: (.*)
modulus: 1
target_label: __tmp_hash
replacement: $1
action: hashmod
- source_labels: [__tmp_hash]
separator: ;
regex: "0"
replacement: $1
action: keep
kubernetes_sd_configs:
- role: endpoints
kubeconfig_file: ""
follow_redirects: true
enable_http2: true
namespaces:
own_namespace: false
names:
- kube-system
Scheduler
Port: 10259
Metrics Endpoint: https://<Master_Node_IP>:10259/metrics
- GO Metrics
- Go Routines
- HTTP Request Metrics
- Request Rate[5m]
- Success Request Rate[5m]
- Error Request Rate[5m]
- Scheduler Metrics
- Total Incoming Pods
- Pod Preemption Attempts[5m]
- Pod Preemption Victims[5m]
- Work Queue Metrics
- Work Queue Adds[5m]
- Work Queue Depth[5m]
- Work Queue Duration[5m]
- job_name: serviceMonitor/default/prometheus-kube-prometheus-kube-scheduler/0
honor_timestamps: true
scrape_interval: 30s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
authorization:
type: Bearer
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
follow_redirects: true
enable_http2: true
relabel_configs:
- source_labels: [job]
separator: ;
regex: (.*)
target_label: __tmp_prometheus_job_name
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app]
separator: ;
regex: (kube-prometheus-stack-kube-scheduler);true
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release]
separator: ;
regex: (prometheus);true
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
separator: ;
regex: http-metrics
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Node;(.*)
target_label: node
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Pod;(.*)
target_label: pod
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: namespace
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: service
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_name]
separator: ;
regex: (.*)
target_label: pod
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: (.*)
target_label: container
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_phase]
separator: ;
regex: (Failed|Succeeded)
replacement: $1
action: drop
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: job
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_service_label_jobLabel]
separator: ;
regex: (.+)
target_label: job
replacement: ${1}
action: replace
- separator: ;
regex: (.*)
target_label: endpoint
replacement: http-metrics
action: replace
- source_labels: [__address__]
separator: ;
regex: (.*)
modulus: 1
target_label: __tmp_hash
replacement: $1
action: hashmod
- source_labels: [__tmp_hash]
separator: ;
regex: "0"
replacement: $1
action: keep
kubernetes_sd_configs:
- role: endpoints
kubeconfig_file: ""
follow_redirects: true
enable_http2: true
namespaces:
own_namespace: false
names:
- kube-system
ETCD
Port: 2381
Metrics Endpoint: https://<Master_Node_IP>:2381/metrics
- ETCD Metrics
- Has Leader
- Total Leader Changes[5m]
- Server Proposals Committed[5m]
- Server Proposals Applied[5m]
- Server Proposals Pending
- Server Proposals Failed WAL
- Latency(Seconds)[5m]
- Commit Latency(Seconds)[5m]
- Round Trip Latency(Seconds)[5m]
- GO Metrics
- Go Routines
- job_name: serviceMonitor/default/prometheus-kube-prometheus-kube-etcd/0
honor_timestamps: true
scrape_interval: 30s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
authorization:
type: Bearer
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
follow_redirects: true
enable_http2: true
relabel_configs:
- source_labels: [job]
separator: ;
regex: (.*)
target_label: __tmp_prometheus_job_name
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app]
separator: ;
regex: (kube-prometheus-stack-kube-etcd);true
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release]
separator: ;
regex: (prometheus);true
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
separator: ;
regex: http-metrics
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Node;(.*)
target_label: node
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Pod;(.*)
target_label: pod
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: namespace
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: service
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_name]
separator: ;
regex: (.*)
target_label: pod
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: (.*)
target_label: container
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_phase]
separator: ;
regex: (Failed|Succeeded)
replacement: $1
action: drop
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: job
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_service_label_jobLabel]
separator: ;
regex: (.+)
target_label: job
replacement: ${1}
action: replace
- separator: ;
regex: (.*)
target_label: endpoint
replacement: http-metrics
action: replace
- source_labels: [__address__]
separator: ;
regex: (.*)
modulus: 1
target_label: __tmp_hash
replacement: $1
action: hashmod
- source_labels: [__tmp_hash]
separator: ;
regex: "0"
replacement: $1
action: keep
kubernetes_sd_configs:
- role: endpoints
kubeconfig_file: ""
follow_redirects: true
enable_http2: true
namespaces:
own_namespace: false
names:
- kube-system
CoreDNS
Port: 2381
Metrics Endpoint: https://<Master_Node_IP>:2381/metrics
- GO Metrics
- Go Routines
- CoreDNS Metrics
- Total DNS Responses[5m]
- Total DNS Requests[5m]
- DNS Request Duration[5m]
- Cache hits[5m]
- job_name: serviceMonitor/default/prometheus-kube-prometheus-coredns/0
honor_timestamps: true
scrape_interval: 30s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
authorization:
type: Bearer
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
follow_redirects: true
enable_http2: true
relabel_configs:
- source_labels: [job]
separator: ;
regex: (.*)
target_label: __tmp_prometheus_job_name
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app]
separator: ;
regex: (kube-prometheus-stack-coredns);true
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release]
separator: ;
regex: (prometheus);true
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
separator: ;
regex: http-metrics
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Node;(.*)
target_label: node
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Pod;(.*)
target_label: pod
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: namespace
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: service
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_name]
separator: ;
regex: (.*)
target_label: pod
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: (.*)
target_label: container
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_phase]
separator: ;
regex: (Failed|Succeeded)
replacement: $1
action: drop
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: job
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_service_label_jobLabel]
separator: ;
regex: (.+)
target_label: job
replacement: ${1}
action: replace
- separator: ;
regex: (.*)
target_label: endpoint
replacement: http-metrics
action: replace
- source_labels: [__address__]
separator: ;
regex: (.*)
modulus: 1
target_label: __tmp_hash
replacement: $1
action: hashmod
- source_labels: [__tmp_hash]
separator: ;
regex: "0"
replacement: $1
action: keep
kubernetes_sd_configs:
- role: endpoints
kubeconfig_file: ""
follow_redirects: true
enable_http2: true
namespaces:
own_namespace: false
names:
- kube-system