From ffe9806f0bb473916a58e66945a266a286030935 Mon Sep 17 00:00:00 2001 From: malcgds Date: Mon, 15 Jan 2024 09:35:25 +0000 Subject: [PATCH 1/3] [#186779486] Added service operation levels dashboard --- .../service-operation-levels.json | 406 ++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100644 manifests/prometheus/dashboards.d/service-operation-levels.json diff --git a/manifests/prometheus/dashboards.d/service-operation-levels.json b/manifests/prometheus/dashboards.d/service-operation-levels.json new file mode 100644 index 0000000000..003b6e6f21 --- /dev/null +++ b/manifests/prometheus/dashboards.d/service-operation-levels.json @@ -0,0 +1,406 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 76, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "description": "Shows the percentage of memory of a Diego Cell", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "avg(bosh_job_mem_percent{bosh_job_name=\"diego-cell\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Cell Utilisation Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "description": "Number of Application Instances", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 12, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "min(cf_application_instances{environment=~\"dev05\",deployment=~\"dev05[-0-9a-f]*\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Desired", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "min(cf_application_instances_running{environment=~\"dev05\",deployment=~\"dev05[-0-9a-f]*\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Running", + "range": true, + "refId": "B", + "step": 10 + } + ], + "title": "Application Instances", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "avg(rate(firehose_value_metric_cc_http_status_5_xx[30d]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Average API 5xx rate over 30 days", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "avg(rate(firehose_counter_event_loggregator_doppler_dropped_total[1h]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Average dropped log message rate", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Service Operation Levels", + "uid": "ae1f02b8-1593-4f9b-9ee6-4be8cef86596", + "version": 6, + "weekStart": "" + } + \ No newline at end of file From 92bbc13226f755370c181da877b63cabfbe27e69 Mon Sep 17 00:00:00 2001 From: malcgds Date: Thu, 18 Jan 2024 11:40:08 +0000 Subject: [PATCH 2/3] [#186779486] Corrected dashboard json spec --- .../dashboards.d/service-operation-levels.json | 2 +- .../operations.d/305-scrape-pingdom-metrics.yml | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 manifests/prometheus/operations.d/305-scrape-pingdom-metrics.yml diff --git a/manifests/prometheus/dashboards.d/service-operation-levels.json b/manifests/prometheus/dashboards.d/service-operation-levels.json index 003b6e6f21..bc41a9974e 100644 --- a/manifests/prometheus/dashboards.d/service-operation-levels.json +++ b/manifests/prometheus/dashboards.d/service-operation-levels.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 76, + "id": null, "links": [], "liveNow": false, "panels": [ diff --git a/manifests/prometheus/operations.d/305-scrape-pingdom-metrics.yml b/manifests/prometheus/operations.d/305-scrape-pingdom-metrics.yml new file mode 100644 index 0000000000..0cff9be87d --- /dev/null +++ b/manifests/prometheus/operations.d/305-scrape-pingdom-metrics.yml @@ -0,0 +1,10 @@ +--- +- type: replace + path: /instance_groups/name=prometheus2/jobs/name=prometheus2/properties/prometheus/scrape_configs/- + value: + job_name: pingdom-exporter + scrape_interval: 5m + scheme: http + static_configs: + - targets: + - pingdom-exporter.((app_domain)) \ No newline at end of file From 140cb965e151b3690e166dd135cacce9d34cddd2 Mon Sep 17 00:00:00 2001 From: malcgds Date: Thu, 18 Jan 2024 16:48:12 +0000 Subject: [PATCH 3/3] [#186779486] Removed hard-coded env --- .../prometheus/dashboards.d/service-operation-levels.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manifests/prometheus/dashboards.d/service-operation-levels.json b/manifests/prometheus/dashboards.d/service-operation-levels.json index bc41a9974e..9ec297fd53 100644 --- a/manifests/prometheus/dashboards.d/service-operation-levels.json +++ b/manifests/prometheus/dashboards.d/service-operation-levels.json @@ -170,7 +170,7 @@ "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", - "expr": "min(cf_application_instances{environment=~\"dev05\",deployment=~\"dev05[-0-9a-f]*\"})", + "expr": "min(cf_application_instances{environment=~\"$environment\",deployment=~\"$bosh_deployment\"})", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -185,7 +185,7 @@ "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", - "expr": "min(cf_application_instances_running{environment=~\"dev05\",deployment=~\"dev05[-0-9a-f]*\"})", + "expr": "min(cf_application_instances_running{environment=~\"$environment\",deployment=~\"$bosh_deployment\"})", "format": "time_series", "hide": false, "interval": "",