From 42706bf236e0c9493394d54bd73c6499b0b0754d Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Mon, 16 Oct 2023 17:28:29 +0530 Subject: [PATCH 01/15] initial addition of grafana and prometheus nodes --- .../devnet/templates/monitoring/grafana.yaml | 86 +++++++++++++++++ .../templates/monitoring/prometheus.yaml | 95 +++++++++++++++++++ charts/devnet/values.schema.json | 27 ++++++ charts/devnet/values.yaml | 11 +++ tests/e2e/configs/two-chain-monitoring.yaml | 50 ++++++++++ 5 files changed, 269 insertions(+) create mode 100644 charts/devnet/templates/monitoring/grafana.yaml create mode 100644 charts/devnet/templates/monitoring/prometheus.yaml create mode 100644 tests/e2e/configs/two-chain-monitoring.yaml diff --git a/charts/devnet/templates/monitoring/grafana.yaml b/charts/devnet/templates/monitoring/grafana.yaml new file mode 100644 index 000000000..73eadef42 --- /dev/null +++ b/charts/devnet/templates/monitoring/grafana.yaml @@ -0,0 +1,86 @@ +{{- if .Values.monitoring.enabled }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasources +data: + prometheus.yaml: |- + { + "apiVersion": 1, + "datasources": [ + { + "access":"proxy", + "editable": true, + "name": "prometheus", + "orgId": 1, + "type": "prometheus", + "url": "http://prometheus.aws-starship.svc:8080", + "version": 1 + } + ] + } +--- +apiVersion: v1 +kind: Service +metadata: + name: grafana + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '8080' +spec: + clusterIP: None + ports: + - name: http + port: 8080 + protocol: TCP + targetPort: 3000 + selector: + app.kubernetes.io/name: grafana +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: grafana + template: + metadata: + name: grafana + labels: + app.kubernetes.io/instance: monitoring + app.kubernetes.io/name: grafana + spec: + containers: + - name: grafana + image: grafana/grafana:latest + env: + - name: GF_AUTH_DISABLE_LOGIN_FORM + value: "true" + - name: GF_AUTH_ANONYMOUS_ENABLED + value: "true" + - name: GF_AUTH_ANONYMOUS_ORG_NAME + value: "Main Org." + - name: GF_AUTH_ANONYMOUS_ORG_ROLE + value: "Editor" + ports: + - name: grafana + containerPort: 3000 + volumeMounts: + - mountPath: /var/lib/grafana + name: grafana-storage + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + readOnly: false + volumes: + - name: grafana-datasources + configMap: + defaultMode: 420 + name: grafana-datasources + - name: grafana-storage + emptyDir: { } +--- +{{- end }} \ No newline at end of file diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml new file mode 100644 index 000000000..bba760c26 --- /dev/null +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -0,0 +1,95 @@ +{{- if .Values.monitoring.enabled }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config +data: + prometheus.yml: | + # my global config + global: + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + # Alertmanager configuration + alerting: + alertmanagers: + - static_configs: + - targets: + # - alertmanager:9093 + # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. + rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + # A scrape configuration containing exactly one endpoint to scrape: + # Here it's Prometheus itself. + scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'prometheus' + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + static_configs: + - targets: ['localhost:9090'] +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '9090' +spec: + clusterIP: None + ports: + - name: http + port: 8080 + protocol: TCP + targetPort: 9090 + selector: + app.kubernetes.io/name: prometheus +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + labels: + app.kubernetes.io/name: prometheus +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: prometheus + template: + metadata: + labels: + app.kubernetes.io/instance: monitoring + app.kubernetes.io/name: prometheus + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + spec: + containers: + - name: prometheus + image: prom/prometheus + args: + - '--storage.tsdb.retention=6h' + - '--storage.tsdb.path=/prometheus' + - '--config.file=/etc/prometheus/prometheus.yml' + ports: + - name: web + containerPort: 9090 + volumeMounts: + - name: prometheus-config-volume + mountPath: /etc/prometheus + - name: prometheus-storage-volume + mountPath: /prometheus + restartPolicy: Always + volumes: + - name: prometheus-config-volume + configMap: + defaultMode: 420 + name: prometheus-config + - name: prometheus-storage-volume + emptyDir: { } +--- +{{- end }} \ No newline at end of file diff --git a/charts/devnet/values.schema.json b/charts/devnet/values.schema.json index 22ee6c7e9..e8ea4dfb1 100644 --- a/charts/devnet/values.schema.json +++ b/charts/devnet/values.schema.json @@ -605,6 +605,33 @@ "required": [ "enabled" ] + }, + "monitoring": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "ports": { + "type": "object", + "properties": { + "prometheus": { + "type": "number" + }, + "grafana": { + "type": "number" + } + }, + "required": [ + "grafana" + ] + }, + "resources": { "$ref": "#/$def/resources" } + }, + "additionalProperties": false, + "required": [ + "enabled" + ] } }, "required": [ diff --git a/charts/devnet/values.yaml b/charts/devnet/values.yaml index 9a0dda78d..3793d5c05 100644 --- a/charts/devnet/values.yaml +++ b/charts/devnet/values.yaml @@ -227,3 +227,14 @@ faucet: resources: cpu: "0.2" memory: "200M" + +# monitoring directive is used to setup prometheus and grafana dashboard, +# connected to all chains, relayers and k8s apis itself +monitoring: + enabled: false + ports: + prometheus: 8011 + grafana: 9011 + resources: + cpu: "0.2" + memory: "200M" diff --git a/tests/e2e/configs/two-chain-monitoring.yaml b/tests/e2e/configs/two-chain-monitoring.yaml new file mode 100644 index 000000000..80506469e --- /dev/null +++ b/tests/e2e/configs/two-chain-monitoring.yaml @@ -0,0 +1,50 @@ +chains: + - name: osmosis-1 + type: osmosis + numValidators: 2 + metrics: true + faucet: + enabled: false + ports: + rest: 1313 + rpc: 26653 + exposer: 38083 + faucet: 8001 + - name: cosmoshub-4 + type: cosmos + image: ghcr.io/cosmology-tech/starship/gaia:v10.0.1 + numValidators: 2 + metrics: true + faucet: + enabled: false + ports: + rest: 1317 + rpc: 26657 + exposer: 38087 + +relayers: + - name: osmos-cosmos + type: hermes + image: ghcr.io/cosmology-tech/starship/hermes:1.6.0 # todo: replace this with ghcr.io image after merge + replicas: 1 + chains: + - osmosis-1 + - cosmoshub-4 + config: + telemetry: + enabled: true + +registry: + enabled: true + ports: + rest: 8081 + grpc: 9091 + +exposer: + image: anmol1696/exposer:20231011-1f81895 + +monitoring: + enabled: true + ports: + grafana: 8082 + prometheus: 8083 \ No newline at end of file From 884f31d269f82e4d46ae8ba143c5a3299224dd30 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Mon, 16 Oct 2023 18:34:52 +0530 Subject: [PATCH 02/15] add prometheus ports to service, set grafana port and addr in env variables --- .../devnet/templates/chains/cosmos/service.yaml | 17 ++++++++++++++++- charts/devnet/templates/monitoring/grafana.yaml | 9 ++++++--- .../devnet/templates/monitoring/prometheus.yaml | 4 ++-- scripts/port-forward.sh | 13 +++++++++++++ tests/e2e/Makefile | 2 +- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/charts/devnet/templates/chains/cosmos/service.yaml b/charts/devnet/templates/chains/cosmos/service.yaml index e3e4702fb..c5d21d974 100644 --- a/charts/devnet/templates/chains/cosmos/service.yaml +++ b/charts/devnet/templates/chains/cosmos/service.yaml @@ -1,11 +1,14 @@ {{ $portMap := dict "p2p" 26656 "address" 26658 "grpc" 9090 "rest" 1317 }} {{- range $chain := .Values.chains }} {{- if ne $chain.type "virtual" }} +{{ $defaultFile := $.Files.Get "defaults.yaml" | fromYaml }} + +{{ $chain := include "devnet.fullchain" (dict "name" $chain.name "file" $defaultFile "context" $) | fromJson }} --- apiVersion: v1 kind: Service metadata: - name: {{ include "devnet.chain.name" $chain.name }}-genesis + name: {{ $chain.hostname }}-genesis labels: app.kubernetes.io/name: {{ $chain.name }}-genesis spec: @@ -21,6 +24,12 @@ spec: port: 26657 protocol: TCP targetPort: 26657 + {{- if $chain.metrics }} + - name: metrics + port: 26660 + protocol: TCP + targetPort: 26660 + {{- end }} - name: exposer port: {{ $.Values.exposer.ports.rest | default 8081 }} protocol: TCP @@ -53,6 +62,12 @@ spec: port: 26657 protocol: TCP targetPort: 26657 + {{- if $chain.metrics }} + - name: metrics + port: 26660 + protocol: TCP + targetPort: 26660 + {{- end }} - name: exposer port: {{ $.Values.exposer.ports.rest | default 8081 }} protocol: TCP diff --git a/charts/devnet/templates/monitoring/grafana.yaml b/charts/devnet/templates/monitoring/grafana.yaml index 73eadef42..136cec5c9 100644 --- a/charts/devnet/templates/monitoring/grafana.yaml +++ b/charts/devnet/templates/monitoring/grafana.yaml @@ -15,7 +15,7 @@ data: "name": "prometheus", "orgId": 1, "type": "prometheus", - "url": "http://prometheus.aws-starship.svc:8080", + "url": "http://prometheus.aws-starship.svc:9090", "version": 1 } ] @@ -33,8 +33,7 @@ spec: ports: - name: http port: 8080 - protocol: TCP - targetPort: 3000 + targetPort: 8080 selector: app.kubernetes.io/name: grafana --- @@ -58,6 +57,10 @@ spec: - name: grafana image: grafana/grafana:latest env: + - name: GF_SERVER_HTTP_PORT + value: "8080" + - name: GF_SERVER_HTTP_ADDR + value: "0.0.0.0" - name: GF_AUTH_DISABLE_LOGIN_FORM value: "true" - name: GF_AUTH_ANONYMOUS_ENABLED diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index bba760c26..1d55b29ff 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -42,7 +42,7 @@ spec: clusterIP: None ports: - name: http - port: 8080 + port: 9090 protocol: TCP targetPort: 9090 selector: @@ -66,7 +66,7 @@ spec: app.kubernetes.io/name: prometheus annotations: prometheus.io/scrape: "true" - prometheus.io/port: "8080" + prometheus.io/port: "9090" spec: containers: - name: prometheus diff --git a/scripts/port-forward.sh b/scripts/port-forward.sh index 071741da0..d37b1da1e 100755 --- a/scripts/port-forward.sh +++ b/scripts/port-forward.sh @@ -29,6 +29,8 @@ CHAIN_FAUCET_PORT=8000 EXPLORER_LCD_PORT=8080 REGISTRY_LCD_PORT=8080 REGISTRY_GRPC_PORT=9090 +MONITORING_PROMETHEUS_PORT=8080 +MONITORING_GRAFANA_PORT=8080 for i in "$@"; do case $i in @@ -94,3 +96,14 @@ then sleep 1 color green "Open the explorer to get started.... http://localhost:8080" fi + +if [[ $(yq -r ".monitoring.enabled" $CONFIGFILE) == "true" ]]; +then + color yellow "monitoring port forward:" + localgrafana=$(yq -r ".monitoring.ports.grafana" ${CONFIGFILE}) + localprometheus=$(yq -r ".monitoring.ports.prometheus" ${CONFIGFILE}) + [[ "$localgrafana" != "null" ]] && color yellow " grafana to http://localhost:$localgrafana" && kubectl port-forward service/grafana $localgrafana:$MONITORING_GRAFANA_PORT > /dev/null 2>&1 & + [[ "$localprometheus" != "null" ]] && color yellow " prometheus to http://localhost:$localprometheus" && kubectl port-forward service/prometheus-service $localprometheus:$MONITORING_PROMETHEUS_PORT > /dev/null 2>&1 & + sleep 1 +fi + diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile index 0d2804b6f..a9d772a6c 100644 --- a/tests/e2e/Makefile +++ b/tests/e2e/Makefile @@ -1,5 +1,5 @@ HELM_NAME ?= starship-e2e-tests -HELM_FILE ?= configs/two-chain.yaml +HELM_FILE ?= configs/two-chain-monitoring.yaml HELM_REPO ?= starship HELM_CHART ?= devnet From 3ab0a74ba8026ac9b5cb0c816f7d231c773bfc1a Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Mon, 16 Oct 2023 19:50:28 +0530 Subject: [PATCH 03/15] remove exposer from test config --- tests/e2e/configs/two-chain-monitoring.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/e2e/configs/two-chain-monitoring.yaml b/tests/e2e/configs/two-chain-monitoring.yaml index 80506469e..37ab02a19 100644 --- a/tests/e2e/configs/two-chain-monitoring.yaml +++ b/tests/e2e/configs/two-chain-monitoring.yaml @@ -40,9 +40,6 @@ registry: rest: 8081 grpc: 9091 -exposer: - image: anmol1696/exposer:20231011-1f81895 - monitoring: enabled: true ports: From 8a9c79e6af9819f2fd3d27774976fe17f5d4d9c0 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Tue, 17 Oct 2023 08:29:53 +0530 Subject: [PATCH 04/15] update prometheus to use static_configs for fetching metrics endpoints for all chain nodes --- .../templates/monitoring/prometheus.yaml | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index 1d55b29ff..31749187d 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -1,4 +1,5 @@ {{- if .Values.monitoring.enabled }} +{{ $defaultFile := $.Files.Get "defaults.yaml" | fromYaml }} --- apiVersion: v1 kind: ConfigMap @@ -11,18 +12,6 @@ data: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). - # Alertmanager configuration - alerting: - alertmanagers: - - static_configs: - - targets: - # - alertmanager:9093 - # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. - rule_files: - # - "first_rules.yml" - # - "second_rules.yml" - # A scrape configuration containing exactly one endpoint to scrape: - # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. - job_name: 'prometheus' @@ -30,6 +19,29 @@ data: # scheme defaults to 'http'. static_configs: - targets: ['localhost:9090'] + {{- range $i, $chain := .Values.chains }} + {{ $fullchain := include "devnet.fullchain" (dict "name" $chain.name "file" $defaultFile "context" $) | fromJson }} + {{- if $fullchain.metrics }} + - job_name: '{{ $fullchain.name }}-genesis' + static_configs: + - targets: ['{{ $fullchain.hostname }}-genesis.{{ $.Release.Namespace }}.svc.cluster.local:26660'] + labels: + node: genesis + type: genesis + chain: "{{ $fullchain.name }}" + {{- if gt $chain.numValidators 1.0}} + {{- range $n, $e := until ($chain.numValidators | int) }} + - job_name: '{{ $fullchain.name }}-validator-{{ $n }}' + static_configs: + - targets: ['{{ $fullchain.hostname }}-validator-{{ $n }}.{{ $fullchain.hostname }}-validator.{{ $.Release.Namespace }}.svc.cluster.local:26660'] + labels: + node: "validator-{{ $n }}" + type: validator + chain: "{{ $fullchain.name }}" + {{- end }} + {{- end }} + {{- end }} + {{- end }} --- apiVersion: v1 kind: Service From c560890ba971f498f9ac0ec25e402f9b42c3be29 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Tue, 17 Oct 2023 21:35:27 +0530 Subject: [PATCH 05/15] add k8s level metrics to prometheus config --- .../templates/monitoring/prometheus.yaml | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index 31749187d..4001a37fc 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -1,6 +1,40 @@ {{- if .Values.monitoring.enabled }} {{ $defaultFile := $.Files.Get "defaults.yaml" | fromYaml }} --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] + - apiGroups: + - extensions + resources: + - ingresses + verbs: ["get", "list", "watch"] + - nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: + - kind: ServiceAccount + name: default + namespace: {{ $.Release.Namespace }} +--- apiVersion: v1 kind: ConfigMap metadata: @@ -14,6 +48,156 @@ data: # scrape_timeout is set to the global default (10s). scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'kubernetes-apiservers' + + kubernetes_sd_configs: + - role: endpoints + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + - job_name: 'kubernetes-nodes' + + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + + + - job_name: 'kubernetes-pods' + + kubernetes_sd_configs: + - role: pod + + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] + +{{/* - job_name: 'kubernetes-cadvisor'*/}} + +{{/* scheme: https*/}} + +{{/* tls_config:*/}} +{{/* ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt*/}} +{{/* bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token*/}} + +{{/* kubernetes_sd_configs:*/}} +{{/* - role: node*/}} + +{{/* relabel_configs:*/}} +{{/* - action: labelmap*/}} +{{/* regex: __meta_kubernetes_node_label_(.+)*/}} +{{/* - target_label: __address__*/}} +{{/* replacement: kubernetes.default.svc:443*/}} +{{/* - source_labels: [__meta_kubernetes_node_name]*/}} +{{/* regex: (.+)*/}} +{{/* target_label: __metrics_path__*/}} +{{/* replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor*/}} + - job_name: kubernetes-nodes-cadvisor + scrape_interval: 10s + scrape_timeout: 10s + scheme: https # remove if you want to scrape metrics on insecure port + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + # Only for Kubernetes ^1.7.3. + # See: https://github.com/prometheus/prometheus/issues/2916 + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + metric_relabel_configs: + - action: replace + source_labels: [id] + regex: '^/machine\.slice/machine-rkt\\x2d([^\\]+)\\.+/([^/]+)\.service$' + target_label: rkt_container_name + replacement: '${2}-${1}' + - action: replace + source_labels: [id] + regex: '^/system\.slice/(.+)\.service$' + target_label: systemd_service_name + replacement: '${1}' + + - job_name: 'kubernetes-service-endpoints' + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name - job_name: 'prometheus' # metrics_path defaults to '/metrics' # scheme defaults to 'http'. From e2fd6504b636bcff6ff2540230564441ea2dd9f1 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Wed, 18 Oct 2023 18:17:03 +0530 Subject: [PATCH 06/15] add grafana dashboard for chains and tendermint dashboard --- .../configs/grafana-dashboards/chains.json | 2277 +++++++++++++++++ .../devnet/templates/monitoring/grafana.yaml | 44 + .../templates/monitoring/prometheus.yaml | 10 +- 3 files changed, 2326 insertions(+), 5 deletions(-) create mode 100644 charts/devnet/configs/grafana-dashboards/chains.json diff --git a/charts/devnet/configs/grafana-dashboards/chains.json b/charts/devnet/configs/grafana-dashboards/chains.json new file mode 100644 index 000000000..b61a02ead --- /dev/null +++ b/charts/devnet/configs/grafana-dashboards/chains.json @@ -0,0 +1,2277 @@ +{ + "__inputs": [], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 37, + "panels": [], + "title": "Tendermint Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.fillOpacity", + "value": 10 + }, + { + "id": "max" + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "custom.axisColorMode", + "value": "series" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 2, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg by (network,instance) ({__name__=~\"^(tendermint|cometbft)_consensus_missing_validators\",network=\"$network\",instance=\"$instance\"})", + "instant": false, + "interval": "", + "legendFormat": "missing validators", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg by (network) ({__name__=~\"^(tendermint|cometbft)_consensus_validators\",network=\"$network\",instance=\"$instance\"})", + "interval": "", + "legendFormat": "total validators", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg by (network) ({__name__=~\"^(tendermint|cometbft)_consensus_missing_validators$\",network=\"$network\",instance=\"$instance\"}) / avg by (network) ({__name__=~\"^(tendermint|cometbft)_consensus_validators$\",network=\"$network\",instance=\"$instance\"}) * 100", + "hide": false, + "interval": "", + "legendFormat": "missing %", + "range": true, + "refId": "C" + } + ], + "title": "Number of Validators", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.axisColorMode", + "value": "series" + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (network,instance) ({__name__=~\"^(tendermint|cometbft)_consensus_validators_power\",network=\"$network\",instance=\"$instance\"})", + "hide": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max by (network,instance) ({__name__=~\"^(tendermint|cometbft)_consensus_validator_power\",network=\"$network\",instance=\"$instance\"})", + "hide": true, + "interval": "1s", + "legendFormat": "Validator", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (network,instance) ({__name__=~\"^(tendermint|cometbft)_consensus_missing_validators_power\",network=\"$network\",instance=\"$instance\"})", + "hide": false, + "legendFormat": "Missing", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (network,instance) ({__name__=~\"^(tendermint|cometbft)_consensus_missing_validators_power$\",network=\"$network\",instance=\"$instance\"}) / max by (network,instance) ({__name__=~\"^(tendermint|cometbft)_consensus_validators_power$\",network=\"$network\",instance=\"$instance\"}) * 100", + "hide": false, + "legendFormat": "Missing %", + "range": true, + "refId": "D" + } + ], + "title": "Voting power", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max by (instance) ({__name__=~\"^(tendermint|cometbft)_consensus_latest_block_height\",network=\"$network\",instance=\"$instance\"})", + "format": "time_series", + "instant": false, + "interval": "1s", + "legendFormat": "{{instance}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (network) ({__name__=~\"^(tendermint|cometbft)_consensus_validator_last_signed_height\",network=\"$network\",instance=\"$instance\"})", + "hide": false, + "legendFormat": "Validator last signed", + "range": true, + "refId": "B" + } + ], + "title": "Consensus Height", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "{__name__=~\"^(tendermint|cometbft)_p2p_peers\",network=\"$network\",instance=\"$instance\"}", + "interval": "1s", + "legendFormat": "peers {{instance}}", + "queryType": "randomWalk", + "range": true, + "refId": "A" + } + ], + "title": "P2P connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "custom.axisColorMode", + "value": "series" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg by (network) (rate({__name__=~\"^(tendermint|cometbft)_state_block_processing_time_sum\",network=\"$network\",instance=\"$instance\"}[1m]))", + "hide": true, + "interval": "", + "legendFormat": "block processing time (1m)", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg by (network) (1/rate({__name__=~\"^(tendermint|cometbft)_consensus_height\",network=\"$network\",instance=\"$instance\"}[1m]))", + "hide": false, + "legendFormat": "1m", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg by (network) (1/rate({__name__=~\"^(tendermint|cometbft)_consensus_height\",network=\"$network\",instance=\"$instance\"}[1h]))", + "hide": false, + "legendFormat": "1h", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg by (network) (1/rate({__name__=~\"^(tendermint|cometbft)_consensus_height\",network=\"$network\",instance=\"$instance\"}[1d]))", + "hide": false, + "legendFormat": "1d", + "range": true, + "refId": "D" + } + ], + "title": "Block Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "custom.axisColorMode", + "value": "series" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.fillOpacity", + "value": 20 + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.axisColorMode", + "value": "series" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 25, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max by (network) ({__name__=~\"^(tendermint|cometbft)_consensus_validator_missed_blocks\",network=\"$network\",instance=\"$instance\"})", + "interval": "", + "legendFormat": "total", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (network) (delta({__name__=~\"^(tendermint|cometbft)_consensus_validator_missed_blocks\",network=\"$network\",instance=\"$instance\"}[1h]))", + "hide": false, + "legendFormat": "1h", + "range": true, + "refId": "B" + } + ], + "title": "Validator Missed Blocks", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "min" + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + }, + { + "id": "custom.axisColorMode", + "value": "series" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "{__name__=~\"^(tendermint|cometbft)_mempool_size\",network=\"$network\",instance=\"$instance\"}", + "interval": "1s", + "legendFormat": "{{instance}} - Total Txs", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "rate({__name__=~\"^(tendermint|cometbft)_mempool_tx_size_bytes_sum\",network=\"$network\",instance=\"$instance\"}[1m])", + "hide": false, + "interval": "1s", + "legendFormat": "{{instance}} - Tx Size Inflow (1min)", + "range": true, + "refId": "B" + } + ], + "title": "Mempool", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.pointSize", + "value": 1 + }, + { + "id": "custom.axisColorMode", + "value": "series" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 56, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_consensus_total_txs\",network=\"$network\",instance=\"$instance\"}[1m])", + "interval": "1s", + "legendFormat": "Confirmed (1min)", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_mempool_failed_txs\",network=\"$network\",instance=\"$instance\"}[1m])", + "hide": false, + "interval": "1s", + "legendFormat": "Failed (1min)", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_mempool_failed_txs$\",network=\"$network\",instance=\"$instance\"}[1m]) / (increase({__name__=~\"^(tendermint|cometbft)_mempool_failed_txs$\",network=\"$network\",instance=\"$instance\"}[1m]) + increase({__name__=~\"^(tendermint|cometbft)_consensus_total_txs$\",network=\"$network\",instance=\"$instance\"}[1m])) * 100", + "hide": false, + "interval": "1s", + "legendFormat": "Total % (1min)", + "range": true, + "refId": "C" + } + ], + "title": "Finalized Transactions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_consensus_block_gossip_parts_received\",network=\"$network\",matches_current=\"true\",instance=\"$instance\"}[1m])", + "interval": "1s", + "legendFormat": "Parts Received - Matching current (1m)", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_consensus_block_gossip_parts_received\",network=\"$network\",matches_current=\"false\",instance=\"$instance\"}[1m])", + "hide": false, + "interval": "1s", + "legendFormat": "Parts Received - Not matching (1m)", + "range": true, + "refId": "B" + } + ], + "title": "Block Parts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 58, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "{__name__=~\"^(tendermint|cometbft)_consensus_block_size_bytes\",network=\"$network\",instance=\"$instance\"}", + "interval": "1s", + "legendFormat": "Block Size", + "range": true, + "refId": "A" + } + ], + "title": "Block Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 59, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "{__name__=~\"^(tendermint|cometbft)_consensus_rounds\",network=\"$network\",instance=\"$instance\"}", + "interval": "1s", + "legendFormat": "Consensus Rounds", + "range": true, + "refId": "A" + } + ], + "title": "Consensus Rounds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 60, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_consensus_step_duration_seconds_sum\",network=\"$network\",step=\"Prevote\",instance=\"$instance\"}[1m]) / 100", + "interval": "1s", + "legendFormat": "Prevote", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_consensus_step_duration_seconds_sum\",network=\"$network\",step=\"Precommit\",instance=\"$instance\"}[1m]) / 100", + "hide": false, + "interval": "1s", + "legendFormat": "Precommit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_consensus_step_duration_seconds_sum\",network=\"$network\",step=\"Commit\",instance=\"$instance\"}[1m]) / 100", + "hide": false, + "interval": "1s", + "legendFormat": "Commit", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_consensus_step_duration_seconds_sum\",network=\"$network\",step=\"NewHeight\",instance=\"$instance\"}[1m]) / 100", + "hide": false, + "interval": "1s", + "legendFormat": "New Height", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_consensus_step_duration_seconds_sum\",network=\"$network\",step=\"Propose\",instance=\"$instance\"}[1m]) / 100", + "hide": false, + "interval": "1s", + "legendFormat": "Propose", + "range": true, + "refId": "E" + } + ], + "title": "Step duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 62, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_p2p_message_receive_bytes_total\",network=\"$network\",instance=\"$instance\"}[1m]) / 600", + "interval": "1s", + "legendFormat": "{{message_type}}", + "range": true, + "refId": "A" + } + ], + "title": "Bytes received / s", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_p2p_message_send_bytes_total\",network=\"$network\",instance=\"$instance\"}[1m])/600", + "interval": "1s", + "legendFormat": "{{message_type}}", + "range": true, + "refId": "A" + } + ], + "title": "Bytes sent / s", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 20, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 64, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_p2p_message_receive_bytes_total\",network=\"$network\",instance=\"$instance\"}[1d])", + "interval": "1s", + "legendFormat": "{{message_type}}", + "range": true, + "refId": "A" + } + ], + "title": "Bytes received (1d)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 20, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 65, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "increase({__name__=~\"^(tendermint|cometbft)_p2p_message_send_bytes_total\",network=\"$network\",instance=\"$instance\"}[1d])", + "interval": "1s", + "legendFormat": "{{message_type}}", + "range": true, + "refId": "A" + } + ], + "title": "Bytes sent (1d)", + "type": "timeseries" + } + ], + "refresh": "", + "revision": 1, + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values({__name__=~\"^(tendermint|cometbft)_consensus_latest_block_height\",network=~\".*\"},network)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "network", + "options": [], + "query": { + "query": "label_values({__name__=~\"^(tendermint|cometbft)_consensus_latest_block_height\",network=~\".*\"},network)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(up{network=\"$network\"},instance)", + "hide": 0, + "includeAll": false, + "label": "instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(up{network=\"$network\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Chain Dashboard", + "uid": "1jUwLdghefUwnk-starship-chain", + "version": 8, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/devnet/templates/monitoring/grafana.yaml b/charts/devnet/templates/monitoring/grafana.yaml index 136cec5c9..abc363644 100644 --- a/charts/devnet/templates/monitoring/grafana.yaml +++ b/charts/devnet/templates/monitoring/grafana.yaml @@ -22,6 +22,37 @@ data: } --- apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-providers +data: + default.yaml: |- + { + "apiVersion": 1, + "providers": [ + { + "name": "chain-dashboard", + "orgId": 1, + "type": "file", + "allowUiUpdates": true, + "options": { + "path": "/var/lib/grafana/dashboards", + } + } + ] + } +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards +data: + {{- range $path, $_ := .Files.Glob "configs/grafana-dashboards/*.json" }} + {{ base $path }}: |- + {{- $.Files.Get $path | nindent 4}} + {{- end }} +--- +apiVersion: v1 kind: Service metadata: name: grafana @@ -78,11 +109,24 @@ spec: - mountPath: /etc/grafana/provisioning/datasources name: grafana-datasources readOnly: false + - mountPath: /etc/grafana/provisioning/dashboards + name: grafana-dashboard-providers + readOnly: false + - mountPath: /var/lib/grafana/dashboards + name: grafana-dashboards + readOnly: false volumes: - name: grafana-datasources configMap: defaultMode: 420 name: grafana-datasources + - name: grafana-dashboard-providers + configMap: + defaultMode: 420 + name: grafana-dashboard-providers + - name: grafana-dashboards + configMap: + name: grafana-dashboards - name: grafana-storage emptyDir: { } --- diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index 4001a37fc..3e1e43fd1 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -210,18 +210,18 @@ data: static_configs: - targets: ['{{ $fullchain.hostname }}-genesis.{{ $.Release.Namespace }}.svc.cluster.local:26660'] labels: - node: genesis + instance: genesis type: genesis - chain: "{{ $fullchain.name }}" + network: "{{ $fullchain.name }}" {{- if gt $chain.numValidators 1.0}} - {{- range $n, $e := until ($chain.numValidators | int) }} + {{- range $n, $e := until ( sub $chain.numValidators 1 | int) }} - job_name: '{{ $fullchain.name }}-validator-{{ $n }}' static_configs: - targets: ['{{ $fullchain.hostname }}-validator-{{ $n }}.{{ $fullchain.hostname }}-validator.{{ $.Release.Namespace }}.svc.cluster.local:26660'] labels: - node: "validator-{{ $n }}" + instance: "validator-{{ $n }}" type: validator - chain: "{{ $fullchain.name }}" + network: "{{ $fullchain.name }}" {{- end }} {{- end }} {{- end }} From e0ea7e4fc996d4bef7f4aca4a5de21521d5dc8c6 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Wed, 18 Oct 2023 18:20:08 +0530 Subject: [PATCH 07/15] add resources to the grafana and prometheus deployments --- charts/devnet/templates/monitoring/grafana.yaml | 2 ++ charts/devnet/templates/monitoring/prometheus.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/charts/devnet/templates/monitoring/grafana.yaml b/charts/devnet/templates/monitoring/grafana.yaml index abc363644..be8fad005 100644 --- a/charts/devnet/templates/monitoring/grafana.yaml +++ b/charts/devnet/templates/monitoring/grafana.yaml @@ -103,6 +103,8 @@ spec: ports: - name: grafana containerPort: 3000 + resources: +{{- include "getResourceObject" $.Values.monitoring.resources | indent 12 }} volumeMounts: - mountPath: /var/lib/grafana name: grafana-storage diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index 3e1e43fd1..18f0867dd 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -274,6 +274,8 @@ spec: ports: - name: web containerPort: 9090 + resources: +{{- include "getResourceObject" $.Values.monitoring.resources | indent 12 }} volumeMounts: - name: prometheus-config-volume mountPath: /etc/prometheus From 9b88d74d81913ae26e2968eeb27a7fe62604e019 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Wed, 18 Oct 2023 19:12:36 +0530 Subject: [PATCH 08/15] increase default memory for monitoring deployments --- charts/devnet/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/devnet/values.yaml b/charts/devnet/values.yaml index ddd12f70b..e091c7ff1 100644 --- a/charts/devnet/values.yaml +++ b/charts/devnet/values.yaml @@ -237,4 +237,4 @@ monitoring: grafana: 9011 resources: cpu: "0.2" - memory: "200M" + memory: "400M" From 798e5937f0def2e4a1aa114bb0cc9b7162b3c478 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Wed, 18 Oct 2023 19:17:55 +0530 Subject: [PATCH 09/15] add newline in the end --- charts/devnet/templates/monitoring/grafana.yaml | 2 +- charts/devnet/templates/monitoring/prometheus.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/devnet/templates/monitoring/grafana.yaml b/charts/devnet/templates/monitoring/grafana.yaml index be8fad005..978a9c98a 100644 --- a/charts/devnet/templates/monitoring/grafana.yaml +++ b/charts/devnet/templates/monitoring/grafana.yaml @@ -132,4 +132,4 @@ spec: - name: grafana-storage emptyDir: { } --- -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index 18f0867dd..06c770291 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -290,4 +290,4 @@ spec: - name: prometheus-storage-volume emptyDir: { } --- -{{- end }} \ No newline at end of file +{{- end }} From f6509a8fb110317d5632b00bed1558ead20aa027 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Wed, 18 Oct 2023 19:20:09 +0530 Subject: [PATCH 10/15] fix newlines at end of file --- charts/devnet/configs/grafana-dashboards/chains.json | 2 +- scripts/port-forward.sh | 1 - tests/e2e/configs/two-chain-monitoring.yaml | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/charts/devnet/configs/grafana-dashboards/chains.json b/charts/devnet/configs/grafana-dashboards/chains.json index b61a02ead..f1dab7bee 100644 --- a/charts/devnet/configs/grafana-dashboards/chains.json +++ b/charts/devnet/configs/grafana-dashboards/chains.json @@ -2274,4 +2274,4 @@ "uid": "1jUwLdghefUwnk-starship-chain", "version": 8, "weekStart": "" -} \ No newline at end of file +} diff --git a/scripts/port-forward.sh b/scripts/port-forward.sh index d37b1da1e..dbe837c18 100755 --- a/scripts/port-forward.sh +++ b/scripts/port-forward.sh @@ -106,4 +106,3 @@ then [[ "$localprometheus" != "null" ]] && color yellow " prometheus to http://localhost:$localprometheus" && kubectl port-forward service/prometheus-service $localprometheus:$MONITORING_PROMETHEUS_PORT > /dev/null 2>&1 & sleep 1 fi - diff --git a/tests/e2e/configs/two-chain-monitoring.yaml b/tests/e2e/configs/two-chain-monitoring.yaml index 37ab02a19..c4fe64008 100644 --- a/tests/e2e/configs/two-chain-monitoring.yaml +++ b/tests/e2e/configs/two-chain-monitoring.yaml @@ -44,4 +44,4 @@ monitoring: enabled: true ports: grafana: 8082 - prometheus: 8083 \ No newline at end of file + prometheus: 8083 From 9bbad69c72f8d7e1b4f52c91a6c0bdc0afb433a8 Mon Sep 17 00:00:00 2001 From: Anmol1696 Date: Wed, 18 Oct 2023 20:08:35 +0530 Subject: [PATCH 11/15] update starship cli for connecting with the monitoring ports --- cmd/starship/connect.go | 21 +++++++++++++++++++-- cmd/starship/model.go | 25 ++++++++++++++++--------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/cmd/starship/connect.go b/cmd/starship/connect.go index 8d30368fb..64ae44459 100644 --- a/cmd/starship/connect.go +++ b/cmd/starship/connect.go @@ -29,6 +29,10 @@ var defaultPorts = map[string]map[string]int{ "rest": 8080, "grpc": 9090, }, + "monitoring": { + "prometheus": 8080, + "grafana": 8080, + }, } // portForward function with perform port-forwarding based on @@ -70,7 +74,7 @@ func (c *Client) PortForwardCmds() ([]*exec.Cmd, []string, error) { } } // port-forward explorer - if config.Explorer != nil { + if config.Explorer != nil && config.Explorer.Enabled { for portType, remotePort := range defaultPorts["explorer"] { port := config.Explorer.Ports.GetPort(portType) if port == 0 { @@ -81,7 +85,7 @@ func (c *Client) PortForwardCmds() ([]*exec.Cmd, []string, error) { } } // port-forward registry - if config.Registry != nil { + if config.Registry != nil && config.Registry.Enabled { for portType, remotePort := range defaultPorts["registry"] { port := config.Registry.Ports.GetPort(portType) if port == 0 { @@ -91,6 +95,19 @@ func (c *Client) PortForwardCmds() ([]*exec.Cmd, []string, error) { cmds = append(cmds, c.execPortForwardCmd("svc/registry", port, remotePort)) } } + // port-forward monitoring + if config.Monitoring != nil && config.Monitoring.Enabled { + for portType, remotePort := range defaultPorts["monitoring"] { + port := config.Registry.Ports.GetPort(portType) + if port == 0 { + continue + } + msgs = append(msgs, fmt.Sprintf("port-forwarding: %s: port %s: to: http://localhost:%d", "monitoring", portType, port)) + // monitoring service have 2 underlying services, prometheus and grafana + // we can select service based on portType itself + cmds = append(cmds, c.execPortForwardCmd(fmt.Sprintf("svc/%s", portType), port, remotePort)) + } + } return cmds, msgs, nil } diff --git a/cmd/starship/model.go b/cmd/starship/model.go index 462e8fbb0..f6ff8403b 100644 --- a/cmd/starship/model.go +++ b/cmd/starship/model.go @@ -46,11 +46,13 @@ type Upgrade struct { } type Port struct { - Rest int `name:"rest" json:"rest" yaml:"rest"` - Rpc int `name:"rpc" json:"rpc" yaml:"rpc"` - Grpc int `name:"grpc" json:"grpc" yaml:"grpc"` - Exposer int `name:"exposer" json:"exposer" yaml:"exposer"` - Faucet int `name:"faucet" json:"faucet" yaml:"faucet"` + Rest int `name:"rest" json:"rest" yaml:"rest"` + Rpc int `name:"rpc" json:"rpc" yaml:"rpc"` + Grpc int `name:"grpc" json:"grpc" yaml:"grpc"` + Exposer int `name:"exposer" json:"exposer" yaml:"exposer"` + Faucet int `name:"faucet" json:"faucet" yaml:"faucet"` + Prometheus int `name:"prometheus" json:"prometheus" yaml:"prometheus"` + Grafana int `name:"grafana" json:"grafana" yaml:"grafana"` } func (p Port) GetPort(port string) int { @@ -65,6 +67,10 @@ func (p Port) GetPort(port string) int { return p.Exposer case "faucet": return p.Faucet + case "prometheus": + return p.Prometheus + case "grafana": + return p.Grafana default: return 0 } @@ -98,10 +104,11 @@ func (f *Feature) GetRESTAddr() string { // todo: move this to a more common place, outside just tests // todo: can be moved to proto defination type HelmConfig struct { - Chains []*Chain `name:"chains" json:"chains" yaml:"chains"` - Relayers []*Relayer `name:"relayers" json:"relayers" yaml:"relayers"` - Explorer *Feature `name:"explorer" json:"explorer" yaml:"explorer"` - Registry *Feature `name:"registry" json:"registry" yaml:"registry"` + Chains []*Chain `name:"chains" json:"chains" yaml:"chains"` + Relayers []*Relayer `name:"relayers" json:"relayers" yaml:"relayers"` + Explorer *Feature `name:"explorer" json:"explorer" yaml:"explorer"` + Registry *Feature `name:"registry" json:"registry" yaml:"registry"` + Monitoring *Feature `name:"monitoring" json:"monitoring" yaml:"monitoring"` } // HasChainId returns true if chain id found in list of chains From 8310273c460908d1c1292a644901bf11c4d34f01 Mon Sep 17 00:00:00 2001 From: Anmol Date: Thu, 19 Oct 2023 14:06:29 +0530 Subject: [PATCH 12/15] Update tests/e2e/Makefile --- tests/e2e/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile index a9d772a6c..0d2804b6f 100644 --- a/tests/e2e/Makefile +++ b/tests/e2e/Makefile @@ -1,5 +1,5 @@ HELM_NAME ?= starship-e2e-tests -HELM_FILE ?= configs/two-chain-monitoring.yaml +HELM_FILE ?= configs/two-chain.yaml HELM_REPO ?= starship HELM_CHART ?= devnet From 4bc83695c5e4bf8fd9640c4c7e9726436091b2b1 Mon Sep 17 00:00:00 2001 From: Anmol Date: Thu, 19 Oct 2023 17:40:43 +0530 Subject: [PATCH 13/15] Update charts/devnet/templates/monitoring/prometheus.yaml --- .../templates/monitoring/prometheus.yaml | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index 06c770291..3d3a971f7 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -116,26 +116,6 @@ data: static_configs: - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] -{{/* - job_name: 'kubernetes-cadvisor'*/}} - -{{/* scheme: https*/}} - -{{/* tls_config:*/}} -{{/* ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt*/}} -{{/* bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token*/}} - -{{/* kubernetes_sd_configs:*/}} -{{/* - role: node*/}} - -{{/* relabel_configs:*/}} -{{/* - action: labelmap*/}} -{{/* regex: __meta_kubernetes_node_label_(.+)*/}} -{{/* - target_label: __address__*/}} -{{/* replacement: kubernetes.default.svc:443*/}} -{{/* - source_labels: [__meta_kubernetes_node_name]*/}} -{{/* regex: (.+)*/}} -{{/* target_label: __metrics_path__*/}} -{{/* replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor*/}} - job_name: kubernetes-nodes-cadvisor scrape_interval: 10s scrape_timeout: 10s From eaa12208dc45d74aebfbce657f8ebf072b79129c Mon Sep 17 00:00:00 2001 From: Anmol Date: Thu, 19 Oct 2023 17:42:05 +0530 Subject: [PATCH 14/15] Update charts/devnet/templates/monitoring/prometheus.yaml --- charts/devnet/templates/monitoring/prometheus.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index 3d3a971f7..112854279 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -178,6 +178,7 @@ data: - source_labels: [__meta_kubernetes_service_name] action: replace target_label: kubernetes_name + - job_name: 'prometheus' # metrics_path defaults to '/metrics' # scheme defaults to 'http'. From b5e0d3fccf19956f06466c187d9fe96fa6d27b16 Mon Sep 17 00:00:00 2001 From: Anmol Date: Thu, 19 Oct 2023 17:42:20 +0530 Subject: [PATCH 15/15] Update charts/devnet/templates/monitoring/prometheus.yaml --- charts/devnet/templates/monitoring/prometheus.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/devnet/templates/monitoring/prometheus.yaml b/charts/devnet/templates/monitoring/prometheus.yaml index 112854279..3369140a4 100644 --- a/charts/devnet/templates/monitoring/prometheus.yaml +++ b/charts/devnet/templates/monitoring/prometheus.yaml @@ -184,6 +184,7 @@ data: # scheme defaults to 'http'. static_configs: - targets: ['localhost:9090'] + {{- range $i, $chain := .Values.chains }} {{ $fullchain := include "devnet.fullchain" (dict "name" $chain.name "file" $defaultFile "context" $) | fromJson }} {{- if $fullchain.metrics }}