Refer to these instructions to install Prometheus on a TKG cluster provisioned with TKr for vSphere 7.x.

Prerequisites

See Workflow for Installing Standard Packages on TKr for vSphere 7.x.

Install Prometheus

Install Prometheus with Altertmanager.
  1. List the available Prometheus package versions in the repository.
    kubectl get packages -n tkg-system | grep prometheus
  2. Create the Prometheus namespace.
    kubectl create ns tanzu-system-monitoring
  3. Configure PSA on the Prometheus namespace.
    kubectl label ns prometheus-monitoring pod-security.kubernetes.io/enforce=privileged
    kubectl get ns prometheus-monitoring -oyaml|grep privileged
  4. Create the prometheus-data-values.yaml file.

    See .

  5. Create a secret using prometheus-data-values.yaml as input.
    Note: Because prometheus-data-values is large, it is less error prone to create the secret separately rather than try to include it in the Prometheus YAML specification.
    kubectl create secret generic prometheus-data-values --from-file=values.yaml=prometheus-data-values.yaml -n tkg-system
    secret/prometheus-data-values created
  6. Verify the secret.
    kubectl get secrets -A
    kubectl describe secret prometheus-data-values -n tkg-system
  7. If necessary, customize the prometheus-data-values for your environment.

    See Prometheus Configuration.

    If you update prometheus-data-values.yaml, replace the secret using this command.
    kubectl create secret generic prometheus-data-values --from-file=values.yaml=prometheus-data-values.yaml -n tkg-system -o yaml --dry-run=client | kubectl replace -f-
    secret/prometheus-data-values replaced
  8. Create the prometheus.yaml specification.

    See Install Prometheus on TKr for vSphere 7.x.

  9. Install Prometheus.
    kubectl apply -f prometheus.yaml
    serviceaccount/prometheus-sa created
    clusterrolebinding.rbac.authorization.k8s.io/prometheus-role-binding created
    packageinstall.packaging.carvel.dev/prometheus created
  10. Verify the installation of the Prometheus package.
    kubectl get pkgi -A
  11. Verify Prometheus objects.
    kubectl get all -n  tanzu-system-monitoring
    NAME                                                 READY   STATUS    RESTARTS   AGE
    pod/alertmanager-757ffd8c6c-97kqd                    1/1     Running   0          87s
    pod/prometheus-kube-state-metrics-67b965c5d8-8mf4k   1/1     Running   0          87s
    pod/prometheus-node-exporter-4spk9                   1/1     Running   0          87s
    pod/prometheus-node-exporter-6k2rh                   1/1     Running   0          87s
    pod/prometheus-node-exporter-7z9s8                   1/1     Running   0          87s
    pod/prometheus-node-exporter-9d6ss                   1/1     Running   0          87s
    pod/prometheus-node-exporter-csbwc                   1/1     Running   0          87s
    pod/prometheus-node-exporter-qdb72                   1/1     Running   0          87s
    pod/prometheus-pushgateway-dff459565-wfrz5           1/1     Running   0          86s
    pod/prometheus-server-56c68567f-bjcn5                2/2     Running   0          87s
    
    NAME                                    TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)         AGE
    service/alertmanager                    ClusterIP   10.109.54.17     <none>        80/TCP          88s
    service/prometheus-kube-state-metrics   ClusterIP   None             <none>        80/TCP,81/TCP   88s
    service/prometheus-node-exporter        ClusterIP   10.104.132.133   <none>        9100/TCP        88s
    service/prometheus-pushgateway          ClusterIP   10.109.80.171    <none>        9091/TCP        88s
    service/prometheus-server               ClusterIP   10.103.252.220   <none>        80/TCP          87s
    
    NAME                                      DESIRED   CURRENT   READY   UP-TO-DATE   AVAILABLE   NODE SELECTOR   AGE
    daemonset.apps/prometheus-node-exporter   6         6         6       6            6           <none>          88s
    
    NAME                                            READY   UP-TO-DATE   AVAILABLE   AGE
    deployment.apps/alertmanager                    1/1     1            1           88s
    deployment.apps/prometheus-kube-state-metrics   1/1     1            1           88s
    deployment.apps/prometheus-pushgateway          1/1     1            1           87s
    deployment.apps/prometheus-server               1/1     1            1           88s
    
    NAME                                                       DESIRED   CURRENT   READY   AGE
    replicaset.apps/alertmanager-757ffd8c6c                    1         1         1       88s
    replicaset.apps/prometheus-kube-state-metrics-67b965c5d8   1         1         1       88s
    replicaset.apps/prometheus-pushgateway-dff459565           1         1         1       87s
    replicaset.apps/prometheus-server-56c68567f                1         1         1       88s
  12. Verify the Prometheus PVC.
    kubectl get pvc -n tanzu-system-monitoring
    NAME                STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS             AGE
    alertmanager        Bound    pvc-5781956b-abc4-4646-b54c-a3eda1bf140c   2Gi        RWO            vsphere-default-policy   53m
    prometheus-server   Bound    pvc-9d45d7cb-6754-40a6-a4b6-f47cf6c949a9   20Gi       RWO            vsphere-default-policy   53m

Access the Prometheus Dashboard

Once Prometheus is installed, complete the following steps to access the Prometheus dashboard.
  1. Ensure that the ingress section of the prometheus-data-values.yaml file is populated with all required fields.
    ingress:
      enabled: true
      virtual_host_fqdn: "prometheus.system.tanzu"
      prometheus_prefix: "/"
      alertmanager_prefix: "/alertmanager/"
      prometheusServicePort: 80
      alertmanagerServicePort: 80
      #! [Optional] The certificate for the ingress if you want to use your own TLS certificate.
      #! We will issue the certificate by cert-manager when it's empty.
      tlsCertificate:
        #! [Required] the certificate
        tls.crt:
        #! [Required] the private key
        tls.key:
        #! [Optional] the CA certificate
        ca.crt:
  2. Get the public (external) IP address of the Contour with Envoy load balancer.
    kubectl -n tanzu-system-ingress get all
  3. Launch the Prometheus web interface.
    kubectl get httpproxy -n tanzu-system-monitoring

    The FQDN should be available at the public IP address for the Envoy service.

    NAME                   FQDN                      TLS SECRET       STATUS   STATUS DESCRIPTION
    prometheus-httpproxy   prometheus.system.tanzu   prometheus-tls   valid    Valid  HTTPProxy
  4. Create a DNS record that maps the Prometheus FQDN to the external IP address of the Envoy load balancer.
  5. Access the Prometheus dashboard by navigating to the Prometheus FQDN using a browser.

prometheus-data-values.yaml

alertmanager:
  config:
    alertmanager_yml: "global: {}\nreceivers:\n- name: default-receiver\ntemplates:\n\
      - '/etc/alertmanager/templates/*.tmpl'\nroute:\n  group_interval: 5m\n  group_wait:\
      \ 10s\n  receiver: default-receiver\n  repeat_interval: 3h\n"
  deployment:
    containers:
      resources: {}
    podAnnotations: {}
    podLabels: {}
    replicas: 1
    rollingUpdate:
      maxSurge: null
      maxUnavailable: null
    updateStrategy: Recreate
  pvc:
    accessMode: ReadWriteOnce
    annotations: {}
    storage: 2Gi
    storageClassName: wcpglobalstorageprofile
  service:
    annotations: {}
    labels: {}
    port: 80
    targetPort: 9093
    type: ClusterIP
ingress:
  alertmanagerServicePort: 80
  alertmanager_prefix: /alertmanager/
  enabled: false
  prometheusServicePort: 80
  prometheus_prefix: /
  tlsCertificate:
    ca.crt: null
    tls.crt: null
    tls.key: null
  virtual_host_fqdn: prometheus.system.tanzu
kube_state_metrics:
  deployment:
    containers:
      resources: {}
    podAnnotations: {}
    podLabels: {}
    replicas: 1
  service:
    annotations: {}
    labels: {}
    port: 80
    targetPort: 8080
    telemetryPort: 81
    telemetryTargetPort: 8081
    type: ClusterIP
namespace: tanzu-system-monitoring
node_exporter:
  daemonset:
    containers:
      resources: {}
    hostNetwork: false
    podAnnotations: {}
    podLabels: {}
    updatestrategy: RollingUpdate
  service:
    annotations: {}
    labels: {}
    port: 9100
    targetPort: 9100
    type: ClusterIP
prometheus:
  config:
    alerting_rules_yml: '{}
 
      '
    alerts_yml: '{}
 
      '
    prometheus_yml: "global:\n  evaluation_interval: 1m\n  scrape_interval: 1m\n \
      \ scrape_timeout: 10s\nrule_files:\n- /etc/config/alerting_rules.yml\n- /etc/config/recording_rules.yml\n\
      - /etc/config/alerts\n- /etc/config/rules\nscrape_configs:\n- job_name: 'prometheus'\n\
      \  scrape_interval: 5s\n  static_configs:\n  - targets: ['localhost:9090']\n\
      - job_name: 'kube-state-metrics'\n  static_configs:\n  - targets: ['prometheus-kube-state-metrics.tanzu-system-monitoring.svc.cluster.local:8080']\n\
      \n- job_name: 'node-exporter'\n  static_configs:\n  - targets: ['prometheus-node-exporter.tanzu-system-monitoring.svc.cluster.local:9100']\n\
      \n- job_name: 'kubernetes-pods'\n  kubernetes_sd_configs:\n  - role: pod\n \
      \ relabel_configs:\n  - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]\n\
      \    action: keep\n    regex: true\n  - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]\n\
      \    action: replace\n    target_label: __metrics_path__\n    regex: (.+)\n\
      \  - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]\n\
      \    action: replace\n    regex: ([^:]+)(?::\\d+)?;(\\d+)\n    replacement:\
      \ $1:$2\n    target_label: __address__\n  - action: labelmap\n    regex: __meta_kubernetes_pod_label_(.+)\n\
      \  - source_labels: [__meta_kubernetes_namespace]\n    action: replace\n   \
      \ target_label: kubernetes_namespace\n  - source_labels: [__meta_kubernetes_pod_name]\n\
      \    action: replace\n    target_label: kubernetes_pod_name\n- job_name: kubernetes-nodes-cadvisor\n\
      \  kubernetes_sd_configs:\n  - role: node\n  relabel_configs:\n  - action: labelmap\n\
      \    regex: __meta_kubernetes_node_label_(.+)\n  - replacement: kubernetes.default.svc:443\n\
      \    target_label: __address__\n  - regex: (.+)\n    replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor\n\
      \    source_labels:\n    - __meta_kubernetes_node_name\n    target_label: __metrics_path__\n\
      \  scheme: https\n  tls_config:\n    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n\
      \    insecure_skip_verify: true\n  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token\n\
      - job_name: kubernetes-apiservers\n  kubernetes_sd_configs:\n  - role: endpoints\n\
      \  relabel_configs:\n  - action: keep\n    regex: default;kubernetes;https\n\
      \    source_labels:\n    - __meta_kubernetes_namespace\n    - __meta_kubernetes_service_name\n\
      \    - __meta_kubernetes_endpoint_port_name\n  scheme: https\n  tls_config:\n\
      \    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n    insecure_skip_verify:\
      \ true\n  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token\n\
      alerting:\n  alertmanagers:\n  - scheme: http\n    static_configs:\n    - targets:\n\
      \      - alertmanager.tanzu-system-monitoring.svc:80\n  - kubernetes_sd_configs:\n\
      \      - role: pod\n    relabel_configs:\n    - source_labels: [__meta_kubernetes_namespace]\n\
      \      regex: default\n      action: keep\n    - source_labels: [__meta_kubernetes_pod_label_app]\n\
      \      regex: prometheus\n      action: keep\n    - source_labels: [__meta_kubernetes_pod_label_component]\n\
      \      regex: alertmanager\n      action: keep\n    - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_probe]\n\
      \      regex: .*\n      action: keep\n    - source_labels: [__meta_kubernetes_pod_container_port_number]\n\
      \      regex:\n      action: drop\n"
    recording_rules_yml: "groups:\n  - name: kube-apiserver.rules\n    interval: 3m\n\
      \    rules:\n    - expr: |2\n        (\n          (\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[1d]))\n            -\n         \
      \   (\n              (\n                sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[1d]))\n\
      \                or\n                vector(0)\n              )\n          \
      \    +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[1d]))\n\
      \              +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[1d]))\n\
      \            )\n          )\n          +\n          # errors\n          sum(rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n        )\n\
      \        /\n        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\"}[1d]))\n      labels:\n        verb: read\n      record:\
      \ apiserver_request:burnrate1d\n    - expr: |2\n        (\n          (\n   \
      \         # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[1h]))\n            -\n         \
      \   (\n              (\n                sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[1h]))\n\
      \                or\n                vector(0)\n              )\n          \
      \    +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[1h]))\n\
      \              +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[1h]))\n\
      \            )\n          )\n          +\n          # errors\n          sum(rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n        )\n\
      \        /\n        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\"}[1h]))\n      labels:\n        verb: read\n      record:\
      \ apiserver_request:burnrate1h\n    - expr: |2\n        (\n          (\n   \
      \         # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[2h]))\n            -\n         \
      \   (\n              (\n                sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[2h]))\n\
      \                or\n                vector(0)\n              )\n          \
      \    +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[2h]))\n\
      \              +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[2h]))\n\
      \            )\n          )\n          +\n          # errors\n          sum(rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n        )\n\
      \        /\n        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\"}[2h]))\n      labels:\n        verb: read\n      record:\
      \ apiserver_request:burnrate2h\n    - expr: |2\n        (\n          (\n   \
      \         # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[30m]))\n            -\n        \
      \    (\n              (\n                sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[30m]))\n\
      \                or\n                vector(0)\n              )\n          \
      \    +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[30m]))\n\
      \              +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[30m]))\n\
      \            )\n          )\n          +\n          # errors\n          sum(rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n        )\n\
      \        /\n        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\"}[30m]))\n      labels:\n        verb: read\n      record:\
      \ apiserver_request:burnrate30m\n    - expr: |2\n        (\n          (\n  \
      \          # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[3d]))\n            -\n         \
      \   (\n              (\n                sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[3d]))\n\
      \                or\n                vector(0)\n              )\n          \
      \    +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[3d]))\n\
      \              +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[3d]))\n\
      \            )\n          )\n          +\n          # errors\n          sum(rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n        )\n\
      \        /\n        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\"}[3d]))\n      labels:\n        verb: read\n      record:\
      \ apiserver_request:burnrate3d\n    - expr: |2\n        (\n          (\n   \
      \         # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[5m]))\n            -\n         \
      \   (\n              (\n                sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[5m]))\n\
      \                or\n                vector(0)\n              )\n          \
      \    +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[5m]))\n\
      \              +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[5m]))\n\
      \            )\n          )\n          +\n          # errors\n          sum(rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n        )\n\
      \        /\n        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\"}[5m]))\n      labels:\n        verb: read\n      record:\
      \ apiserver_request:burnrate5m\n    - expr: |2\n        (\n          (\n   \
      \         # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[6h]))\n            -\n         \
      \   (\n              (\n                sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[6h]))\n\
      \                or\n                vector(0)\n              )\n          \
      \    +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[6h]))\n\
      \              +\n              sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[6h]))\n\
      \            )\n          )\n          +\n          # errors\n          sum(rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n        )\n\
      \        /\n        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\"}[6h]))\n      labels:\n        verb: read\n      record:\
      \ apiserver_request:burnrate6h\n    - expr: |2\n        (\n          (\n   \
      \         # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n           \
      \ -\n            sum(rate(apiserver_request_duration_seconds_bucket{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1d]))\n          )\n          +\n\
      \          sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"\
      POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n        )\n        /\n       \
      \ sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"\
      }[1d]))\n      labels:\n        verb: write\n      record: apiserver_request:burnrate1d\n\
      \    - expr: |2\n        (\n          (\n            # too slow\n          \
      \  sum(rate(apiserver_request_duration_seconds_count{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n            -\n            sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1h]))\n  \
      \        )\n          +\n          sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n        )\n        /\n\
      \        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"\
      POST|PUT|PATCH|DELETE\"}[1h]))\n      labels:\n        verb: write\n      record:\
      \ apiserver_request:burnrate1h\n    - expr: |2\n        (\n          (\n   \
      \         # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n           \
      \ -\n            sum(rate(apiserver_request_duration_seconds_bucket{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[2h]))\n          )\n          +\n\
      \          sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"\
      POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n        )\n        /\n       \
      \ sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"\
      }[2h]))\n      labels:\n        verb: write\n      record: apiserver_request:burnrate2h\n\
      \    - expr: |2\n        (\n          (\n            # too slow\n          \
      \  sum(rate(apiserver_request_duration_seconds_count{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n            -\n            sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30m]))\n \
      \         )\n          +\n          sum(rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n\
      \        )\n        /\n        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n      labels:\n        verb: write\n\
      \      record: apiserver_request:burnrate30m\n    - expr: |2\n        (\n  \
      \        (\n            # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n           \
      \ -\n            sum(rate(apiserver_request_duration_seconds_bucket{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[3d]))\n          )\n          +\n\
      \          sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"\
      POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n        )\n        /\n       \
      \ sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"\
      }[3d]))\n      labels:\n        verb: write\n      record: apiserver_request:burnrate3d\n\
      \    - expr: |2\n        (\n          (\n            # too slow\n          \
      \  sum(rate(apiserver_request_duration_seconds_count{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n            -\n            sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[5m]))\n  \
      \        )\n          +\n          sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n        )\n        /\n\
      \        sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"\
      POST|PUT|PATCH|DELETE\"}[5m]))\n      labels:\n        verb: write\n      record:\
      \ apiserver_request:burnrate5m\n    - expr: |2\n        (\n          (\n   \
      \         # too slow\n            sum(rate(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n           \
      \ -\n            sum(rate(apiserver_request_duration_seconds_bucket{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[6h]))\n          )\n          +\n\
      \          sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"\
      POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n        )\n        /\n       \
      \ sum(rate(apiserver_request_total{job=\"kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"\
      }[6h]))\n      labels:\n        verb: write\n      record: apiserver_request:burnrate6h\n\
      \    - expr: |\n        sum by (code,resource) (rate(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[5m]))\n      labels:\n        verb:\
      \ read\n      record: code_resource:apiserver_request_total:rate5m\n    - expr:\
      \ |\n        sum by (code,resource) (rate(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n      labels:\n        verb: write\n\
      \      record: code_resource:apiserver_request_total:rate5m\n    - expr: |\n\
      \        histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[5m]))) > 0\n      labels:\n    \
      \    quantile: \"0.99\"\n        verb: read\n      record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile\n\
      \    - expr: |\n        histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))) > 0\n      labels:\n\
      \        quantile: \"0.99\"\n        verb: write\n      record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile\n\
      \    - expr: |2\n        sum(rate(apiserver_request_duration_seconds_sum{subresource!=\"\
      log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance,\
      \ pod)\n        /\n        sum(rate(apiserver_request_duration_seconds_count{subresource!=\"\
      log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance,\
      \ pod)\n      record: cluster:apiserver_request_duration_seconds:mean5m\n  \
      \  - expr: |\n        histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"\
      }[5m])) without(instance, pod))\n      labels:\n        quantile: \"0.99\"\n\
      \      record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile\n\
      \    - expr: |\n        histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"\
      }[5m])) without(instance, pod))\n      labels:\n        quantile: \"0.9\"\n\
      \      record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile\n\
      \    - expr: |\n        histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"\
      }[5m])) without(instance, pod))\n      labels:\n        quantile: \"0.5\"\n\
      \      record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile\n\
      \  - interval: 3m\n    name: kube-apiserver-availability.rules\n    rules:\n\
      \    - expr: |2\n        1 - (\n          (\n            # write too slow\n\
      \            sum(increase(apiserver_request_duration_seconds_count{verb=~\"\
      POST|PUT|PATCH|DELETE\"}[30d]))\n            -\n            sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"\
      POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n          ) +\n          (\n     \
      \       # read too slow\n            sum(increase(apiserver_request_duration_seconds_count{verb=~\"\
      LIST|GET\"}[30d]))\n            -\n            (\n              (\n        \
      \        sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\"\
      ,scope=~\"resource|\",le=\"0.1\"}[30d]))\n                or\n             \
      \   vector(0)\n              )\n              +\n              sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"\
      LIST|GET\",scope=\"namespace\",le=\"0.5\"}[30d]))\n              +\n       \
      \       sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\"\
      ,scope=\"cluster\",le=\"5\"}[30d]))\n            )\n          ) +\n        \
      \  # errors\n          sum(code:apiserver_request_total:increase30d{code=~\"\
      5..\"} or vector(0))\n        )\n        /\n        sum(code:apiserver_request_total:increase30d)\n\
      \      labels:\n        verb: all\n      record: apiserver_request:availability30d\n\
      \    - expr: |2\n        1 - (\n          sum(increase(apiserver_request_duration_seconds_count{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\"}[30d]))\n          -\n          (\n\
      \            # too slow\n            (\n              sum(increase(apiserver_request_duration_seconds_bucket{job=\"\
      kubernetes-apiservers\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[30d]))\n\
      \              or\n              vector(0)\n            )\n            +\n \
      \           sum(increase(apiserver_request_duration_seconds_bucket{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[30d]))\n            +\n\
      \            sum(increase(apiserver_request_duration_seconds_bucket{job=\"kubernetes-apiservers\"\
      ,verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[30d]))\n          )\n      \
      \    +\n          # errors\n          sum(code:apiserver_request_total:increase30d{verb=\"\
      read\",code=~\"5..\"} or vector(0))\n        )\n        /\n        sum(code:apiserver_request_total:increase30d{verb=\"\
      read\"})\n      labels:\n        verb: read\n      record: apiserver_request:availability30d\n\
      \    - expr: |2\n        1 - (\n          (\n            # too slow\n      \
      \      sum(increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"\
      }[30d]))\n            -\n            sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"\
      POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n          )\n          +\n       \
      \   # errors\n          sum(code:apiserver_request_total:increase30d{verb=\"\
      write\",code=~\"5..\"} or vector(0))\n        )\n        /\n        sum(code:apiserver_request_total:increase30d{verb=\"\
      write\"})\n      labels:\n        verb: write\n      record: apiserver_request:availability30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"LIST\",code=~\"2..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"GET\",code=~\"2..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"POST\",code=~\"2..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"PUT\",code=~\"2..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"PATCH\",code=~\"2..\"}[30d]))\n      record:\
      \ code_verb:apiserver_request_total:increase30d\n    - expr: |\n        sum\
      \ by (code, verb) (increase(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=\"DELETE\",code=~\"2..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"LIST\",code=~\"3..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"GET\",code=~\"3..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"POST\",code=~\"3..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"PUT\",code=~\"3..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"PATCH\",code=~\"3..\"}[30d]))\n      record:\
      \ code_verb:apiserver_request_total:increase30d\n    - expr: |\n        sum\
      \ by (code, verb) (increase(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=\"DELETE\",code=~\"3..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"LIST\",code=~\"4..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"GET\",code=~\"4..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"POST\",code=~\"4..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"PUT\",code=~\"4..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"PATCH\",code=~\"4..\"}[30d]))\n      record:\
      \ code_verb:apiserver_request_total:increase30d\n    - expr: |\n        sum\
      \ by (code, verb) (increase(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=\"DELETE\",code=~\"4..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"LIST\",code=~\"5..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"GET\",code=~\"5..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"POST\",code=~\"5..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"PUT\",code=~\"5..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code, verb) (increase(apiserver_request_total{job=\"\
      kubernetes-apiservers\",verb=\"PATCH\",code=~\"5..\"}[30d]))\n      record:\
      \ code_verb:apiserver_request_total:increase30d\n    - expr: |\n        sum\
      \ by (code, verb) (increase(apiserver_request_total{job=\"kubernetes-apiservers\"\
      ,verb=\"DELETE\",code=~\"5..\"}[30d]))\n      record: code_verb:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~\"\
      LIST|GET\"})\n      labels:\n        verb: read\n      record: code:apiserver_request_total:increase30d\n\
      \    - expr: |\n        sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~\"\
      POST|PUT|PATCH|DELETE\"})\n      labels:\n        verb: write\n      record:\
      \ code:apiserver_request_total:increase30d\n"
    rules_yml: '{}
 
      '
  deployment:
    configmapReload:
      containers:
        args:
        - --volume-dir=/etc/config
        - --webhook-url=http://127.0.0.1:9090/-/reload
        resources: {}
    containers:
      args:
      - --storage.tsdb.retention.time=42d
      - --config.file=/etc/config/prometheus.yml
      - --storage.tsdb.path=/data
      - --web.console.libraries=/etc/prometheus/console_libraries2
      - --web.console.templates=/etc/prometheus/consoles
      - --web.enable-lifecycle
      resources: {}
    podAnnotations: {}
    podLabels: {}
    replicas: 1
    rollingUpdate:
      maxSurge: null
      maxUnavailable: null
    updateStrategy: Recreate
  pvc:
    accessMode: ReadWriteOnce
    annotations: {}
    storage: 150Gi
    storageClassName: wcpglobalstorageprofile
  service:
    annotations: {}
    labels: {}
    port: 80
    targetPort: 9090
    type: ClusterIP
pushgateway:
  deployment:
    containers:
      resources: {}
    podAnnotations: {}
    podLabels: {}
    replicas: 1
  service:
    annotations: {}
    labels: {}
    port: 9091
    targetPort: 9091
    type: ClusterIP

prometheus.yaml

The prometheus.yaml spec references the prometheus-data-values secret.
apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus-sa
  namespace: tkg-system
 
---
# temp
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus-role-binding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
  - kind: ServiceAccount
    name: prometheus-sa
    namespace: tkg-system
 
---
apiVersion: packaging.carvel.dev/v1alpha1
kind: PackageInstall
metadata:
  name: prometheus
  namespace: tkg-system
spec:
  serviceAccountName: prometheus-sa
  packageRef:
    refName: prometheus.tanzu.vmware.com
    versionSelection:
      constraints: 2.45.0+vmware.1-tkg.2
  values:
  - secretRef:
      name: prometheus-data-values