diff --git a/beyla-mixin/Makefile b/beyla-mixin/Makefile new file mode 100644 index 000000000..b4fdca560 --- /dev/null +++ b/beyla-mixin/Makefile @@ -0,0 +1 @@ +include ../Makefile_mixin diff --git a/beyla-mixin/README.md b/beyla-mixin/README.md new file mode 100644 index 000000000..ac3cbed7f --- /dev/null +++ b/beyla-mixin/README.md @@ -0,0 +1,3 @@ +# Beyla monitoring mixins + +This mixin includes Grafana Dashboards and Alerts for Beyla. \ No newline at end of file diff --git a/beyla-mixin/alerts/alerts.libsonnet b/beyla-mixin/alerts/alerts.libsonnet new file mode 100644 index 000000000..f366523ea --- /dev/null +++ b/beyla-mixin/alerts/alerts.libsonnet @@ -0,0 +1,102 @@ +{ + prometheusAlerts+: { + groups+: [ + { + name: 'beyla_internal_alerts', + rules: [ + { + alert: 'BeylaOTELMetricExportHighErrorRate', + expr: ||| + 100 * sum(rate(beyla_otel_metric_export_errors_total[1m])) by (cluster) + / + sum(rate(beyla_otel_metric_exports_total[1m])) by (cluster) + > 2 + |||, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Beyla has a high error rate for OTEL metric exports.', + description: 'Beyla in { $labels.cluster }} is experiencing {{ printf "%.2f" $value }}% errors for OTEL metric exports.', + runbook_url: 'https://github.com/grafana/beyla/tree/main/ops/runbook.md#BeylaOTELMetricExportHighErrorRate', + }, + }, + { + alert: 'BeylaOTELMetricExportHighErrorRate', + expr: ||| + 100 * sum(rate(beyla_otel_metric_export_errors_total[1m])) by (cluster) + / + sum(rate(beyla_otel_metric_exports_total[1m])) by (cluster) + > 5 + |||, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Beyla has a high error rate for OTEL metric exports.', + description: 'Beyla in { $labels.cluster }} is experiencing {{ printf "%.2f" $value }}% errors for OTEL metric exports.', + runbook_url: 'https://github.com/grafana/beyla/tree/main/ops/runbook.md#BeylaOTELMetricExportHighErrorRate', + }, + }, + { + alert: 'BeylaOTELTraceExportHighErrorRate', + expr: ||| + 100 * sum(rate(beyla_otel_trace_export_errors_total[1m])) by (cluster) + / + sum(rate(beyla_otel_trace_exports_total[1m])) by (cluster) + > 2 + |||, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Beyla has a high error rate for OTEL trace exports.', + description: 'Beyla in { $labels.cluster }} is experiencing {{ printf "%.2f" $value }}% errors for OTEL trace exports.', + runbook_url: 'https://github.com/grafana/beyla/tree/main/ops/runbook.md#BeylaOTELTraceExportHighErrorRate', + }, + }, + { + alert: 'BeylaOTELTraceExportHighErrorRate', + expr: ||| + 100 * sum(rate(beyla_otel_trace_export_errors_total[1m])) by (cluster) + / + sum(rate(beyla_otel_trace_exports_total[1m])) by (cluster) + > 5 + |||, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Beyla has a high error rate for OTEL trace exports.', + description: 'Beyla in { $labels.cluster }} is experiencing {{ printf "%.2f" $value }}% errors for OTEL trace exports.', + runbook_url: 'https://github.com/grafana/beyla/tree/main/ops/runbook.md#BeylaOTELTraceExportHighErrorRate', + }, + }, + { + alert: 'BeylaInstrumentedProcessesNoTelemetry', + expr: ||| + sum by(cluster) (beyla_instrumented_processes{process_name!="beyla"}) > 1 and + (absent(sum by(cluster) (rate(beyla_otel_metric_exports_total[5m]))) or + absent(sum by(cluster) (rate(beyla_otel_trace_exports_total[5m]))) or + absent(sum by(cluster) (rate(beyla_ebpf_tracer_flushes_sum[5m]))) or + absent(sum by(cluster) (rate(beyla_prometheus_http_requests_total[5m])))) + |||, + 'for': '10m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Beyla has instrumented processes without metrics or traces.', + description: 'Beyla in {{ $labels.cluster }} has more than 1 instrumented processes, but no metrics or traces have been exported in the last 10 minutes.', + runbook_url: 'https://github.com/grafana/beyla/tree/main/ops/runbook.md#BeylaInstrumentedProcessesNoTelemetry', + }, + }, + ], + }, + ], + }, +} diff --git a/beyla-mixin/dashboards/application.json b/beyla-mixin/dashboards/application.json new file mode 100644 index 000000000..8031a76a6 --- /dev/null +++ b/beyla-mixin/dashboards/application.json @@ -0,0 +1,1203 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "HTTP and gRPC RED metrics visualization for Grafana Beyla", + "editable": false, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 38, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "the slowest HTTP routes sorted by response time percentile", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "json-view" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Duration (ms)" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 13, + "x": 0, + "y": 0 + }, + "id": 35, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 2, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Duration (ms)" + } + ] + }, + "pluginVersion": "10.1.0-57417pre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sort_desc(topk by(http_route, service_name) (5, max by (http_route, service_name) (histogram_quantile(0.95, (sum by(http_route, service_name, le) (rate({__name__=~\"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])))))))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{service_name}} - {{http_route}}", + "range": false, + "refId": "A" + } + ], + "title": "Slowest HTTP routes (P95)", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Duration (ms)", + "binary": { + "left": "Value", + "operator": "*", + "reducer": "sum", + "right": "1000" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + }, + "replaceFields": false + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Duration (ms)": 4, + "Time": 1, + "Value": 3, + "http_route": 2, + "service_name": 0 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "the slowest remote procedure calls (RPC) in your application sorted by response time percentile", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "json-view" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Duration (ms)" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 11, + "x": 13, + "y": 0 + }, + "id": 56, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 2, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Duration (ms)" + } + ] + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sort_desc(topk by(rpc_method, service_name) (5, max by (rpc_method, service_name) (histogram_quantile(0.95, (sum by(rpc_method, service_name, le) (rate({__name__=~\"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])))))))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{service_name}} - {{http_route}}", + "range": false, + "refId": "A" + } + ], + "title": "Slowest RPC methods (P95)", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Duration (ms)", + "binary": { + "left": "Value", + "operator": "*", + "reducer": "sum", + "right": "1000" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + }, + "replaceFields": false + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 72, + "panels": [], + "repeat": "Service", + "repeatDirection": "h", + "title": "Inbound: $Service", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "average and top percentiles for both HTTP and gRPC traffic", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 63, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 6 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~\"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])))", + "legendFormat": "HTTP p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate({__name__=~\"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) by (service_name, le))", + "hide": false, + "legendFormat": "HTTP p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"http_server_request_duration_seconds_sum|http_server_request_duration_sum\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval])) / sum(rate({__name__=~\"http_server_request_duration_seconds_count|http_server_request_duration_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval]))", + "hide": false, + "legendFormat": "HTTP Avg", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~\"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "RPC p99", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(service_name, le) (rate({__name__=~\"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "RPC p95", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"rpc_server_duration_seconds_sum|rpc_server_duration_sum\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval])) / sum(rate({__name__=~\"rpc_server_duration_seconds_count|rpc_server_duration_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval]))", + "hide": false, + "legendFormat": "RPC Avg", + "range": true, + "refId": "F" + } + ], + "title": "Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "number of requests per second, faceted by its HTTP or gRPC return code", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 6 + }, + "id": 58, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"http_server_request_duration_seconds_count|http_server_request_duration_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval])) by (http_response_status_code)", + "hide": false, + "legendFormat": "HTTP server - {{http_response_status_code}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"rpc_server_duration_seconds_count|rpc_server_duration_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval])) by (service_name, rpc_grpc_status_code)", + "hide": false, + "legendFormat": "RPC server (status {{rpc_grpc_status_code}})", + "range": true, + "refId": "D" + } + ], + "title": "Request rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "percentage of 4xx|5xx HTTP responses or non-zero gRPC responses over the total of the requests", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 6 + }, + "id": 59, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (http_response_status_code) (rate({__name__=~\"http_server_request_duration_seconds_count|http_server_request_duration_count\",service_name=\"${Service}\",http_response_status_code=~\"(4|5).*\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) / ignoring(http_response_status_code) group_left sum(rate({__name__=~\"http_server_request_duration_seconds_count|http_server_request_duration_count\",service_name=\"${Service}\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "HTTP server - {{http_response_status_code}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (rpc_grpc_status_code) (rate({__name__=~\"rpc_server_duration_seconds_count|rpc_server_duration_count\",service_name=\"${Service}\",rpc_grpc_status_code!=\"0\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) / ignoring(rpc_grpc_status_code) group_left sum(rate({__name__=~\"rpc_server_duration_seconds_count|rpc_server_duration_count\",service_name=\"${Service}\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "RPC server (status {{rpc_grpc_status_code}})", + "range": true, + "refId": "D" + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 5, + "panels": [], + "repeat": "Service", + "repeatDirection": "h", + "title": "Outbound: $Service", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "average and top percentiles for both HTTP and gRPC traffic", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 63, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 15 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~\"http_client_request_duration_seconds_bucket|http_client_request_duration_bucket\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])))", + "legendFormat": "HTTP p99", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate({__name__=~\"http_client_request_duration_seconds_bucket|http_client_request_duration_bucket\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) by (service_name, le)) ", + "hide": false, + "legendFormat": "HTTP p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"http_client_request_duration_seconds_sum|http_client_request_duration_sum\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval])) / sum(rate({__name__=~\"http_client_request_duration_count|http_client_request_duration_seconds_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval]))", + "hide": false, + "legendFormat": "HTTP Avg", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"rpc_client_duration_seconds_sum|rpc_client_duration_sum\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval])) / sum(rate({__name__=~\"rpc_client_duration_count|rpc_client_duration_seconds_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval]))", + "hide": false, + "legendFormat": "RPC Avg", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~\"rpc_client_duration_seconds_bucket|rpc_client_duration_bucket\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "RPC p99", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate({__name__=~\"rpc_client_duration_seconds_bucket|rpc_client_duration_bucket\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) by (service_name, le)) ", + "hide": false, + "legendFormat": "RPC p95", + "range": true, + "refId": "F" + } + ], + "title": "Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "number of requests per second, faceted by its HTTP or gRPC return code", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 15 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"http_client_request_duration_count|http_client_request_duration_seconds_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval])) by (service_name, http_response_status_code)", + "legendFormat": "HTTP client - {{http_response_status_code}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"rpc_client_duration_count|rpc_client_duration_seconds_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"} [$__rate_interval])) by (service_name, rpc_grpc_status_code)", + "hide": false, + "legendFormat": "RPC client (status {{rpc_grpc_status_code}})", + "range": true, + "refId": "C" + } + ], + "title": "Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "percentage of 5xx HTTP responses or non-zero gRPC responses over the total of the requests", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "HTTP server - 500" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 15 + }, + "id": 73, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (http_response_status_code) (rate({__name__=~\"http_client_request_duration_count|http_client_request_duration_seconds_count\",service_name=\"$Service\",http_response_status_code=~\"5.*\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) / ignoring(http_response_status_code) group_left sum(rate({__name__=~\"http_client_request_duration_count|http_client_request_duration_seconds_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "HTTP client - {{http_response_status_code}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (rpc_grpc_status_code) (rate({__name__=~\"rpc_client_duration_count|rpc_client_duration_seconds_count\",service_name=\"$Service\",rpc_grpc_status_code!=\"0\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) / ignoring(rpc_grpc_status_code) group_left sum(rate({__name__=~\"rpc_client_duration_count|rpc_client_duration_seconds_count\",service_name=\"$Service\",instance=~\"$instance\",job=~\"$job\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "RPC client (status {{rpc_grpc_status_code}})", + "range": true, + "refId": "D" + } + ], + "title": "Error rate", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [ + "ebpf", + "red", + "http", + "grpc", + "https", + "beyla" + ], + "templating": { + "list": [ + { + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(service_name)", + "hide": 0, + "includeAll": true, + "label": "Service", + "multi": true, + "name": "Service", + "options": [], + "query": { + "query": "label_values(service_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(http_server_request_duration_seconds_bucket, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "query": "label_values(http_server_request_duration_seconds_bucket{job=~\"$job\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Beyla RED Metrics", + "uid": "e0701985-a623-4e62-9fae-f5094244d065", + "version": 3, + "weekStart": "" +} diff --git a/beyla-mixin/dashboards/application_process.json b/beyla-mixin/dashboards/application_process.json new file mode 100644 index 000000000..6659002e2 --- /dev/null +++ b/beyla-mixin/dashboards/application_process.json @@ -0,0 +1,884 @@ +{ + "__inputs": [ + { + "name": "DS_GRAFANACLOUD_PROM", + "label": "grafanacloud-prom", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.3.0-75696" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Information about the processes belonging to the applications instrumented by Beyla", + "editable": false, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "CPU utilization percentage for each instrumented process", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-75696", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_cpu_time.*\",process_pid=\"\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_cpu_time.*\",process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "hide": false, + "instant": false, + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "B" + } + ], + "title": "CPU percent", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Physical memory usage for each instrumented process", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-75696", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum({__name__=~\"process_memory_usage.*\",process_pid=\"\",job=~\"$job\",instance=~\"$instance\"} + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum({__name__=~\"process_memory_usage.*\",process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"}) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "hide": false, + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "B" + } + ], + "title": "Physical memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Virtual memory usage for each instrumented process", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-75696", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum({__name__=~\"process_memory_virtual.*\",process_pid=\"\",job=~\"$job\",instance=~\"$instance\"} + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum({__name__=~\"process_memory_virtual.*\",process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"}) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "hide": false, + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "B" + } + ], + "title": "Virtual memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Disk read operations rate for each instrumented process", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-75696", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_disk_io.*\",process_pid=\"\",disk_io_direction=\"read\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_disk_io.*\",process_pid!=\"\",disk_io_direction=\"read\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "hide": false, + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "B" + } + ], + "title": "Disk I/O read", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Disk write operations rate for each instrumented process", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-75696", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_disk_io.*\",process_pid=\"\",disk_io_direction=\"write\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_disk_io.*\",process_pid!=\"\",disk_io_direction=\"write\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "hide": false, + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "B" + } + ], + "title": "Disk I/O write", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Network receive throughput for each instrumented process", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-75696", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_network_io.*\",process_pid=\"\",network_io_direction=\"receive\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_network_io.*\",process_pid!=\"\",network_io_direction=\"receive\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "hide": false, + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "B" + } + ], + "title": "Network I/O receive", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Network transmit throughput for each instrumented process", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-75696", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_network_io.*\",process_pid=\"\",network_io_direction=\"transmit\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!=\"\",job=~\"$job\",instance=~\"$instance\"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({__name__=~\"process_network_io.*\",process_pid!=\"\",network_io_direction=\"transmit\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid)", + "hide": false, + "legendFormat": "{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})", + "range": true, + "refId": "B" + } + ], + "title": "Network I/O transmit", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [ + "process", + "beyla" + ], + "templating": { + "list": [ + { + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(process_cpu_time_seconds_total, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "query": "label_values(process_cpu_time_seconds_total{job=~\"$job\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Beyla process metrics", + "uid": "bdw3b5cqiwlq8b", + "version": 40, + "weekStart": "" +} \ No newline at end of file diff --git a/beyla-mixin/dashboards/beyla_debug.json b/beyla-mixin/dashboards/beyla_debug.json new file mode 100644 index 000000000..6ee09c2ac --- /dev/null +++ b/beyla-mixin/dashboards/beyla_debug.json @@ -0,0 +1,852 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 72, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 12, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Current version of Beyla running in the cluster", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "" + }, + "properties": [] + } + ] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^version$/", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0-74868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "beyla_internal_build_info{job=~\"$job\",instance=~\"$instance\"}", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Beyla Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Number of Beyla pods currently running in the cluster", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0-74868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "kube_daemonset_status_number_ready{daemonset=\"beyla\",job=~\"$job\",instance=~\"$instance\"}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DaemonSet: Running pods", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Total number of OpenTelemetry metrics exported by Beyla", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0-74868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(cluster) (beyla_otel_metric_exports_total{job=~\"$job\",instance=~\"$instance\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "OTEL metric exports", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Total number of OpenTelemetry traces exported by Beyla", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0-74868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(cluster) (beyla_otel_trace_exports_total{job=~\"$job\",instance=~\"$instance\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "OTEL trace exports", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "List of all processes discovered and instrumented by Beyla", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "^(?!.*process|Value).*$" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 1, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "process_name" + } + ] + }, + "pluginVersion": "11.3.0-74868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(process_name) (beyla_instrumented_processes{job=~\"$job\",instance=~\"$instance\"})", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "A", + "useBackend": false + } + ], + "title": "Discovered Processes", + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Memory usage of Beyla pods over time", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 8, + "y": 4 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-74868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "process_memory_usage_bytes{job=\"default/beyla\",job=~\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "{{k8s_pod_name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "CPU utilization of Beyla processes over time", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 16, + "x": 8, + "y": 12 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0-74868", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(process_command) (process_cpu_utilization_ratio{job=\"default/beyla\",job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "{{process_command}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "panels": [], + "title": "Logs", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "grafanacloud-logs" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 6, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "11.3.0-74868", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "grafanacloud-logs" + }, + "editorMode": "code", + "expr": "{job=\"default/beyla\"} | logfmt | component!=`BPFLogger`", + "queryType": "range", + "refId": "A" + } + ], + "title": "Application Logs", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "grafanacloud-logs" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 10, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "11.2.0-73179", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "grafanacloud-logs" + }, + "editorMode": "code", + "expr": "{job=\"default/beyla\"} | logfmt | component=`BPFLogger`", + "queryType": "range", + "refId": "A" + } + ], + "title": "BPF Logs", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "grafanacloud-logs" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 7, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "11.2.0-73179", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "grafanacloud-logs" + }, + "editorMode": "code", + "expr": "{job=\"default/beyla\"} |= `ERROR`", + "queryType": "range", + "refId": "A" + } + ], + "title": "Application Errors", + "type": "logs" + } + ], + "preload": false, + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "beyla", + "ebpf" + ], + "templating": { + "list": [ + { + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(beyla_internal_build_info, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "query": "label_values(beyla_internal_build_info{job=~\"$job\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "baseFilters": [], + "datasource": { + "type": "loki", + "uid": "grafanacloud-logs" + }, + "filters": [], + "name": "Filters", + "type": "adhoc" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Beyla", + "uid": "bdp5slbe9ivb4b", + "version": 19, + "weekStart": "" +} \ No newline at end of file diff --git a/beyla-mixin/dashboards/dashboards.libsonnet b/beyla-mixin/dashboards/dashboards.libsonnet new file mode 100644 index 000000000..efb3748d9 --- /dev/null +++ b/beyla-mixin/dashboards/dashboards.libsonnet @@ -0,0 +1,14 @@ +{ + grafanaDashboardFolder+: 'beyla', + grafanaDashboards+: { + 'beyla_debug.json': (import './beyla_debug.json') { + uid: std.md5('beyla_debug.json'), + }, + 'application.json': (import './application.json') { + uid: std.md5('application.json'), + }, + 'application_process.json': (import './application_process.json') { + uid: std.md5('application_process.json'), + }, + }, +} diff --git a/beyla-mixin/mixin.libsonnet b/beyla-mixin/mixin.libsonnet new file mode 100644 index 000000000..ae506a4d2 --- /dev/null +++ b/beyla-mixin/mixin.libsonnet @@ -0,0 +1,8 @@ +{ + grafanaDashboards+:: { + 'beyla_debug.json': (import 'dashboards/beyla_debug.json'), + 'application.json': (import 'dashboards/application.json'), + 'application_process.json': (import 'dashboards/application_process.json'), + }, + prometheusAlerts+:: (import 'alerts/alerts.libsonnet').prometheusAlerts, +} diff --git a/beyla-mixin/tmp b/beyla-mixin/tmp new file mode 100644 index 000000000..3e14af75e --- /dev/null +++ b/beyla-mixin/tmp @@ -0,0 +1,4471 @@ +parse error in 'mixin.libsonnet': found invalid object (at .): errors parsing resource: kind missing, metadata missing, spec missing + +grafana: + dashboards: + - apiVersion: grizzly.grafana.com/v1alpha1 + kind: Dashboard + metadata: + folder: beyla + name: 422313feb73f77366e56faa7870b77ec + spec: + annotations: + list: + - builtIn: 1 + datasource: + type: grafana + uid: -- Grafana -- + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + description: HTTP and gRPC RED metrics visualization for Grafana Beyla + editable: false + fiscalYearStartMonth: 0 + graphTooltip: 0 + id: 38 + links: [] + liveNow: false + panels: + - datasource: + type: prometheus + uid: ${datasource} + description: the slowest HTTP routes sorted by response time percentile + fieldConfig: + defaults: + color: + mode: thresholds + custom: + align: auto + cellOptions: + type: json-view + filterable: true + inspect: false + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: s + overrides: + - matcher: + id: byName + options: Time + properties: + - id: custom.hidden + value: true + - matcher: + id: byName + options: Value + properties: + - id: custom.hidden + value: true + - matcher: + id: byName + options: Duration (ms) + properties: + - id: decimals + value: 0 + gridPos: + h: 5 + w: 13 + x: 0 + "y": 0 + id: 35 + options: + cellHeight: sm + footer: + countRows: false + fields: "" + reducer: + - sum + show: false + frameIndex: 2 + showHeader: true + sortBy: + - desc: true + displayName: Duration (ms) + pluginVersion: 10.1.0-57417pre + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sort_desc(topk by(http_route, service_name) (5, max by (http_route, service_name) (histogram_quantile(0.95, (sum by(http_route, service_name, le) (rate({__name__=~"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket",instance=~"$instance",job=~"$job"}[$__rate_interval]))))))) + format: table + hide: false + instant: true + legendFormat: '{{service_name}} - {{http_route}}' + range: false + refId: A + title: Slowest HTTP routes (P95) + transformations: + - id: calculateField + options: + alias: Duration (ms) + binary: + left: Value + operator: '*' + reducer: sum + right: "1000" + mode: binary + reduce: + reducer: sum + replaceFields: false + - id: organize + options: + excludeByName: {} + indexByName: + Duration (ms): 4 + Time: 1 + Value: 3 + http_route: 2 + service_name: 0 + renameByName: {} + type: table + - datasource: + type: prometheus + uid: ${datasource} + description: the slowest remote procedure calls (RPC) in your application sorted by response time percentile + fieldConfig: + defaults: + color: + mode: thresholds + custom: + align: auto + cellOptions: + type: json-view + filterable: true + inspect: false + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: s + overrides: + - matcher: + id: byName + options: Time + properties: + - id: custom.hidden + value: true + - matcher: + id: byName + options: Value + properties: + - id: custom.hidden + value: true + - matcher: + id: byName + options: Duration (ms) + properties: + - id: decimals + value: 0 + gridPos: + h: 5 + w: 11 + x: 13 + "y": 0 + id: 56 + options: + cellHeight: sm + footer: + countRows: false + fields: "" + reducer: + - sum + show: false + frameIndex: 2 + showHeader: true + sortBy: + - desc: true + displayName: Duration (ms) + pluginVersion: 9.5.2 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sort_desc(topk by(rpc_method, service_name) (5, max by (rpc_method, service_name) (histogram_quantile(0.95, (sum by(rpc_method, service_name, le) (rate({__name__=~"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket",instance=~"$instance",job=~"$job"}[$__rate_interval]))))))) + format: table + hide: false + instant: true + legendFormat: '{{service_name}} - {{http_route}}' + range: false + refId: A + title: Slowest RPC methods (P95) + transformations: + - id: calculateField + options: + alias: Duration (ms) + binary: + left: Value + operator: '*' + reducer: sum + right: "1000" + mode: binary + reduce: + reducer: sum + replaceFields: false + type: table + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + "y": 5 + id: 72 + panels: [] + repeat: Service + repeatDirection: h + title: 'Inbound: $Service' + type: row + - datasource: + type: prometheus + uid: ${datasource} + description: average and top percentiles for both HTTP and gRPC traffic + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + drawStyle: line + fillOpacity: 63 + gradientMode: opacity + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 3 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: s + overrides: [] + gridPos: + h: 8 + w: 8 + x: 0 + "y": 6 + id: 1 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + legendFormat: HTTP p99 + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.95, sum(rate({__name__=~"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) by (service_name, le)) + hide: false + legendFormat: HTTP p95 + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"http_server_request_duration_seconds_sum|http_server_request_duration_sum",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) / sum(rate({__name__=~"http_server_request_duration_seconds_count|http_server_request_duration_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) + hide: false + legendFormat: HTTP Avg + range: true + refId: C + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + hide: false + legendFormat: RPC p99 + range: true + refId: D + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.95, sum by(service_name, le) (rate({__name__=~"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + hide: false + legendFormat: RPC p95 + range: true + refId: E + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"rpc_server_duration_seconds_sum|rpc_server_duration_sum",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) / sum(rate({__name__=~"rpc_server_duration_seconds_count|rpc_server_duration_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) + hide: false + legendFormat: RPC Avg + range: true + refId: F + title: Duration + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: number of requests per second, faceted by its HTTP or gRPC return code + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: reqps + overrides: [] + gridPos: + h: 8 + w: 8 + x: 8 + "y": 6 + id: 58 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"http_server_request_duration_seconds_count|http_server_request_duration_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) by (http_response_status_code) + hide: false + legendFormat: HTTP server - {{http_response_status_code}} + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"rpc_server_duration_seconds_count|rpc_server_duration_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) by (service_name, rpc_grpc_status_code) + hide: false + legendFormat: RPC server (status {{rpc_grpc_status_code}}) + range: true + refId: D + title: Request rate + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: percentage of 4xx|5xx HTTP responses or non-zero gRPC responses over the total of the requests + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + axisSoftMax: 1 + barAlignment: 0 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: percentunit + overrides: [] + gridPos: + h: 8 + w: 8 + x: 16 + "y": 6 + id: 59 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum by (http_response_status_code) (rate({__name__=~"http_server_request_duration_seconds_count|http_server_request_duration_count",service_name="${Service}",http_response_status_code=~"(4|5).*",instance=~"$instance",job=~"$job"}[$__rate_interval])) / ignoring(http_response_status_code) group_left sum(rate({__name__=~"http_server_request_duration_seconds_count|http_server_request_duration_count",service_name="${Service}",instance=~"$instance",job=~"$job"}[$__rate_interval])) + hide: false + legendFormat: HTTP server - {{http_response_status_code}} + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum by (rpc_grpc_status_code) (rate({__name__=~"rpc_server_duration_seconds_count|rpc_server_duration_count",service_name="${Service}",rpc_grpc_status_code!="0",instance=~"$instance",job=~"$job"}[$__rate_interval])) / ignoring(rpc_grpc_status_code) group_left sum(rate({__name__=~"rpc_server_duration_seconds_count|rpc_server_duration_count",service_name="${Service}",instance=~"$instance",job=~"$job"}[$__rate_interval])) + hide: false + legendFormat: RPC server (status {{rpc_grpc_status_code}}) + range: true + refId: D + title: Error rate + type: timeseries + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + "y": 14 + id: 5 + panels: [] + repeat: Service + repeatDirection: h + title: 'Outbound: $Service' + type: row + - datasource: + type: prometheus + uid: ${datasource} + description: average and top percentiles for both HTTP and gRPC traffic + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + drawStyle: line + fillOpacity: 63 + gradientMode: opacity + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 3 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: s + overrides: [] + gridPos: + h: 8 + w: 8 + x: 0 + "y": 15 + id: 6 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~"http_client_request_duration_seconds_bucket|http_client_request_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + legendFormat: HTTP p99 + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: 'histogram_quantile(0.95, sum(rate({__name__=~"http_client_request_duration_seconds_bucket|http_client_request_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) by (service_name, le)) ' + hide: false + legendFormat: HTTP p95 + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"http_client_request_duration_seconds_sum|http_client_request_duration_sum",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) / sum(rate({__name__=~"http_client_request_duration_count|http_client_request_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) + hide: false + legendFormat: HTTP Avg + range: true + refId: C + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"rpc_client_duration_seconds_sum|rpc_client_duration_sum",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) / sum(rate({__name__=~"rpc_client_duration_count|rpc_client_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) + hide: false + legendFormat: RPC Avg + range: true + refId: D + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~"rpc_client_duration_seconds_bucket|rpc_client_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + hide: false + legendFormat: RPC p99 + range: true + refId: E + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: 'histogram_quantile(0.95, sum(rate({__name__=~"rpc_client_duration_seconds_bucket|rpc_client_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) by (service_name, le)) ' + hide: false + legendFormat: RPC p95 + range: true + refId: F + title: Duration + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: number of requests per second, faceted by its HTTP or gRPC return code + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: reqps + overrides: [] + gridPos: + h: 8 + w: 8 + x: 8 + "y": 15 + id: 8 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"http_client_request_duration_count|http_client_request_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) by (service_name, http_response_status_code) + legendFormat: HTTP client - {{http_response_status_code}} + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"rpc_client_duration_count|rpc_client_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) by (service_name, rpc_grpc_status_code) + hide: false + legendFormat: RPC client (status {{rpc_grpc_status_code}}) + range: true + refId: C + title: Rate + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: percentage of 5xx HTTP responses or non-zero gRPC responses over the total of the requests + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + axisSoftMax: 1 + barAlignment: 0 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: percentunit + overrides: + - __systemRef: hideSeriesFrom + matcher: + id: byNames + options: + mode: exclude + names: + - HTTP server - 500 + prefix: 'All except:' + readOnly: true + properties: + - id: custom.hideFrom + value: + legend: false + tooltip: false + viz: false + gridPos: + h: 8 + w: 8 + x: 16 + "y": 15 + id: 73 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum by (http_response_status_code) (rate({__name__=~"http_client_request_duration_count|http_client_request_duration_seconds_count",service_name="$Service",http_response_status_code=~"5.*",instance=~"$instance",job=~"$job"}[$__rate_interval])) / ignoring(http_response_status_code) group_left sum(rate({__name__=~"http_client_request_duration_count|http_client_request_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) + hide: false + legendFormat: HTTP client - {{http_response_status_code}} + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum by (rpc_grpc_status_code) (rate({__name__=~"rpc_client_duration_count|rpc_client_duration_seconds_count",service_name="$Service",rpc_grpc_status_code!="0",instance=~"$instance",job=~"$job"}[$__rate_interval])) / ignoring(rpc_grpc_status_code) group_left sum(rate({__name__=~"rpc_client_duration_count|rpc_client_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) + hide: false + legendFormat: RPC client (status {{rpc_grpc_status_code}}) + range: true + refId: D + title: Error rate + type: timeseries + refresh: 30s + schemaVersion: 38 + tags: + - ebpf + - red + - http + - grpc + - https + - beyla + templating: + list: + - current: + selected: false + text: Prometheus + value: Prometheus + description: Source of the metrics (e.g. Prometheus source) + hide: 0 + includeAll: false + label: Data source + multi: false + name: datasource + options: [] + query: prometheus + queryValue: "" + refresh: 1 + regex: "" + skipUrlSync: false + type: datasource + - current: + selected: true + text: + - All + value: + - $__all + datasource: + type: prometheus + uid: ${datasource} + definition: label_values(service_name) + hide: 0 + includeAll: true + label: Service + multi: true + name: Service + options: [] + query: + query: label_values(service_name) + refId: PrometheusVariableQueryEditor-VariableQuery + refresh: 2 + regex: "" + skipUrlSync: false + sort: 5 + type: query + - allValue: .+ + current: + selected: true + text: + - All + value: + - $__all + datasource: + type: prometheus + uid: ${datasource} + definition: label_values(job) + hide: 0 + includeAll: true + label: Job + multi: true + name: job + options: [] + query: + query: label_values(job) + refId: PrometheusVariableQueryEditor-VariableQuery + refresh: 2 + regex: "" + skipUrlSync: false + sort: 0 + type: query + - allValue: .+ + current: + selected: true + text: + - All + value: + - $__all + datasource: + type: prometheus + uid: ${datasource} + definition: label_values(instance) + hide: 0 + includeAll: true + label: Instance + multi: true + name: instance + options: [] + query: + query: label_values(instance) + refId: PrometheusVariableQueryEditor-VariableQuery + refresh: 2 + regex: "" + skipUrlSync: false + sort: 0 + type: query + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Job + multi: true + name: job + query: label_values(up{job!="kubelet"}, job) + refresh: 2 + sort: 1 + type: query + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Instance + multi: true + name: instance + query: label_values(up{job=~"$job"}, instance) + refresh: 2 + sort: 1 + type: query + time: + from: now-30m + to: now + timepicker: {} + timezone: "" + title: Beyla RED Metrics + uid: 422313feb73f77366e56faa7870b77ec + version: 3 + weekStart: "" + - apiVersion: grizzly.grafana.com/v1alpha1 + kind: Dashboard + metadata: + folder: beyla + name: a0d490796b9cf61d5e83aa79cb3463ee + spec: + __elements: {} + __inputs: + - description: "" + label: grafanacloud-prom + name: DS_GRAFANACLOUD_PROM + pluginId: prometheus + pluginName: Prometheus + type: datasource + __requires: + - id: grafana + name: Grafana + type: grafana + version: 11.3.0-75696 + - id: prometheus + name: Prometheus + type: datasource + version: 1.0.0 + - id: timeseries + name: Time series + type: panel + version: "" + annotations: + list: + - builtIn: 1 + datasource: + type: grafana + uid: -- Grafana -- + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + description: Information about the processes belonging to the applications instrumented by Beyla + editable: false + fiscalYearStartMonth: 0 + graphTooltip: 0 + id: null + links: [] + panels: + - datasource: + type: prometheus + uid: ${datasource} + description: CPU utilization percentage for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: percentunit + overrides: [] + gridPos: + h: 8 + w: 24 + x: 0 + "y": 0 + id: 2 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_cpu_time.*",process_pid="",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_cpu_time.*",process_pid!="",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + instant: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: CPU percent + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Physical memory usage for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: bytes + overrides: [] + gridPos: + h: 8 + w: 12 + x: 0 + "y": 8 + id: 3 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum({__name__=~"process_memory_usage.*",process_pid="",job=~"$job",instance=~"$instance"} + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum({__name__=~"process_memory_usage.*",process_pid!="",job=~"$job",instance=~"$instance"}) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Physical memory + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Virtual memory usage for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: bytes + overrides: [] + gridPos: + h: 8 + w: 12 + x: 12 + "y": 8 + id: 4 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum({__name__=~"process_memory_virtual.*",process_pid="",job=~"$job",instance=~"$instance"} + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum({__name__=~"process_memory_virtual.*",process_pid!="",job=~"$job",instance=~"$instance"}) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Virtual memory + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Disk read operations rate for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: binBps + overrides: [] + gridPos: + h: 8 + w: 12 + x: 0 + "y": 16 + id: 5 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_disk_io.*",process_pid="",disk_io_direction="read",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_disk_io.*",process_pid!="",disk_io_direction="read",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Disk I/O read + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Disk write operations rate for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: binBps + overrides: [] + gridPos: + h: 8 + w: 12 + x: 12 + "y": 16 + id: 7 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_disk_io.*",process_pid="",disk_io_direction="write",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_disk_io.*",process_pid!="",disk_io_direction="write",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Disk I/O write + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Network receive throughput for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: binBps + overrides: [] + gridPos: + h: 8 + w: 12 + x: 0 + "y": 24 + id: 6 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_network_io.*",process_pid="",network_io_direction="receive",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_network_io.*",process_pid!="",network_io_direction="receive",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Network I/O receive + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Network transmit throughput for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: binBps + overrides: [] + gridPos: + h: 8 + w: 12 + x: 12 + "y": 24 + id: 8 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_network_io.*",process_pid="",network_io_direction="transmit",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_network_io.*",process_pid!="",network_io_direction="transmit",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Network I/O transmit + type: timeseries + schemaVersion: 39 + tags: + - process + - beyla + templating: + list: + - label: Data source + name: datasource + query: prometheus + regex: "" + type: datasource + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Job + multi: true + name: job + query: label_values(up{job!="kubelet"}, job) + refresh: 2 + sort: 1 + type: query + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Instance + multi: true + name: instance + query: label_values(up{job=~"$job"}, instance) + refresh: 2 + sort: 1 + type: query + time: + from: now-15m + to: now + timepicker: {} + timezone: browser + title: Beyla process metrics + uid: a0d490796b9cf61d5e83aa79cb3463ee + version: 40 + weekStart: "" + - apiVersion: grizzly.grafana.com/v1alpha1 + kind: Dashboard + metadata: + folder: beyla + name: b0d6558d68f5f420831661892b6a2207 + spec: + annotations: + list: + - builtIn: 1 + datasource: + type: grafana + uid: -- Grafana -- + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + editable: false + fiscalYearStartMonth: 0 + graphTooltip: 0 + id: 72 + links: [] + panels: + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + "y": 0 + id: 12 + panels: [] + title: Overview + type: row + - datasource: + type: prometheus + uid: ${datasource} + description: Current version of Beyla running in the cluster + fieldConfig: + defaults: + color: + mode: thresholds + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: + - matcher: + id: byName + options: "" + properties: [] + gridPos: + h: 3 + w: 3 + x: 0 + "y": 1 + id: 2 + options: + colorMode: value + graphMode: area + justifyMode: auto + orientation: auto + percentChangeColorMode: standard + reduceOptions: + calcs: + - lastNotNull + fields: /^version$/ + values: false + showPercentChange: false + textMode: auto + wideLayout: true + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: beyla_internal_build_info{job=~"$job",instance=~"$instance"} + format: table + instant: true + legendFormat: __auto + range: false + refId: A + title: Beyla Version + type: stat + - datasource: + type: prometheus + uid: ${datasource} + description: Number of Beyla pods currently running in the cluster + fieldConfig: + defaults: + color: + mode: thresholds + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: [] + gridPos: + h: 3 + w: 3 + x: 3 + "y": 1 + id: 5 + options: + colorMode: value + graphMode: area + justifyMode: auto + orientation: auto + percentChangeColorMode: standard + reduceOptions: + calcs: + - lastNotNull + fields: "" + values: false + showPercentChange: false + textMode: auto + wideLayout: true + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: kube_daemonset_status_number_ready{daemonset="beyla",job=~"$job",instance=~"$instance"} + instant: false + legendFormat: __auto + range: true + refId: A + title: 'DaemonSet: Running pods' + type: stat + - datasource: + type: prometheus + uid: ${datasource} + description: Total number of OpenTelemetry metrics exported by Beyla + fieldConfig: + defaults: + color: + mode: thresholds + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: [] + gridPos: + h: 3 + w: 3 + x: 6 + "y": 1 + id: 3 + options: + colorMode: value + graphMode: area + justifyMode: auto + orientation: auto + percentChangeColorMode: standard + reduceOptions: + calcs: + - lastNotNull + fields: "" + values: false + showPercentChange: false + textMode: auto + wideLayout: true + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + disableTextWrap: false + editorMode: builder + exemplar: false + expr: sum by(cluster) (beyla_otel_metric_exports_total{job=~"$job",instance=~"$instance"}) + fullMetaSearch: false + includeNullMetadata: true + instant: false + legendFormat: __auto + range: true + refId: A + useBackend: false + title: OTEL metric exports + type: stat + - datasource: + type: prometheus + uid: ${datasource} + description: Total number of OpenTelemetry traces exported by Beyla + fieldConfig: + defaults: + color: + mode: thresholds + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: [] + gridPos: + h: 3 + w: 3 + x: 9 + "y": 1 + id: 4 + options: + colorMode: value + graphMode: area + justifyMode: auto + orientation: auto + percentChangeColorMode: standard + reduceOptions: + calcs: + - lastNotNull + fields: "" + values: false + showPercentChange: false + textMode: auto + wideLayout: true + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + exemplar: false + expr: sum by(cluster) (beyla_otel_trace_exports_total{job=~"$job",instance=~"$instance"}) + instant: false + legendFormat: __auto + range: true + refId: A + title: OTEL trace exports + type: stat + - datasource: + type: prometheus + uid: ${datasource} + description: List of all processes discovered and instrumented by Beyla + fieldConfig: + defaults: + color: + mode: thresholds + custom: + align: auto + cellOptions: + type: auto + inspect: false + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: + - matcher: + id: byRegexp + options: ^(?!.*process|Value).*$ + properties: + - id: custom.hidden + value: true + gridPos: + h: 17 + w: 8 + x: 0 + "y": 4 + id: 1 + options: + cellHeight: sm + footer: + countRows: false + fields: "" + reducer: + - sum + show: false + showHeader: true + sortBy: + - desc: false + displayName: process_name + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + disableTextWrap: false + editorMode: builder + exemplar: false + expr: sum by(process_name) (beyla_instrumented_processes{job=~"$job",instance=~"$instance"}) + format: table + fullMetaSearch: false + includeNullMetadata: true + instant: true + interval: "" + legendFormat: __auto + range: false + refId: A + useBackend: false + title: Discovered Processes + type: table + - datasource: + type: prometheus + uid: ${datasource} + description: Memory usage of Beyla pods over time + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: decbytes + overrides: [] + gridPos: + h: 8 + w: 16 + x: 8 + "y": 4 + id: 8 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + disableTextWrap: false + editorMode: builder + exemplar: false + expr: process_memory_usage_bytes{job="default/beyla",job=~"$job",instance=~"$instance"} + format: time_series + fullMetaSearch: false + hide: false + includeNullMetadata: true + instant: false + interval: "" + legendFormat: '{{k8s_pod_name}}' + range: true + refId: A + useBackend: false + title: Memory Usage + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: CPU utilization of Beyla processes over time + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: percentunit + overrides: [] + gridPos: + h: 9 + w: 16 + x: 8 + "y": 12 + id: 9 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + disableTextWrap: false + editorMode: builder + exemplar: false + expr: sum by(process_command) (process_cpu_utilization_ratio{job="default/beyla",job=~"$job",instance=~"$instance"}) + format: time_series + fullMetaSearch: false + hide: false + includeNullMetadata: true + instant: false + interval: "" + legendFormat: '{{process_command}}' + range: true + refId: A + useBackend: false + title: CPU Usage + type: timeseries + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + "y": 21 + id: 11 + panels: [] + title: Logs + type: row + - datasource: + type: loki + uid: grafanacloud-logs + fieldConfig: + defaults: {} + overrides: [] + gridPos: + h: 9 + w: 24 + x: 0 + "y": 22 + id: 6 + options: + dedupStrategy: none + enableLogDetails: true + prettifyLogMessage: false + showCommonLabels: false + showLabels: false + showTime: false + sortOrder: Descending + wrapLogMessage: false + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: loki + uid: grafanacloud-logs + editorMode: code + expr: '{job="default/beyla"} | logfmt | component!=`BPFLogger`' + queryType: range + refId: A + title: Application Logs + type: logs + - datasource: + type: loki + uid: grafanacloud-logs + fieldConfig: + defaults: {} + overrides: [] + gridPos: + h: 9 + w: 24 + x: 0 + "y": 31 + id: 10 + options: + dedupStrategy: none + enableLogDetails: true + prettifyLogMessage: false + showCommonLabels: false + showLabels: false + showTime: false + sortOrder: Descending + wrapLogMessage: false + pluginVersion: 11.2.0-73179 + targets: + - datasource: + type: loki + uid: grafanacloud-logs + editorMode: code + expr: '{job="default/beyla"} | logfmt | component=`BPFLogger`' + queryType: range + refId: A + title: BPF Logs + type: logs + - datasource: + type: loki + uid: grafanacloud-logs + fieldConfig: + defaults: {} + overrides: [] + gridPos: + h: 5 + w: 24 + x: 0 + "y": 40 + id: 7 + options: + dedupStrategy: none + enableLogDetails: true + prettifyLogMessage: false + showCommonLabels: false + showLabels: false + showTime: false + sortOrder: Descending + wrapLogMessage: false + pluginVersion: 11.2.0-73179 + targets: + - datasource: + type: loki + uid: grafanacloud-logs + editorMode: code + expr: '{job="default/beyla"} |= `ERROR`' + queryType: range + refId: A + title: Application Errors + type: logs + preload: false + refresh: 10s + schemaVersion: 39 + tags: + - beyla + - ebpf + templating: + list: + - label: Data source + name: datasource + query: prometheus + regex: "" + type: datasource + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Job + multi: true + name: job + query: label_values(up{job!="kubelet"}, job) + refresh: 2 + sort: 1 + type: query + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Instance + multi: true + name: instance + query: label_values(up{job=~"$job"}, instance) + refresh: 2 + sort: 1 + type: query + - baseFilters: [] + datasource: + type: loki + uid: grafanacloud-logs + filters: [] + name: Filters + type: adhoc + time: + from: now-6h + to: now + timepicker: {} + timezone: browser + title: Beyla + uid: b0d6558d68f5f420831661892b6a2207 + version: 19 + weekStart: "" + datasources: {} + folders: + apiVersion: grizzly.grafana.com/v1alpha1 + kind: DashboardFolder + metadata: + name: beyla + spec: + title: beyla +grafanaDashboardFolder: beyla +grafanaDashboards: + application.json: + annotations: + list: + - builtIn: 1 + datasource: + type: grafana + uid: -- Grafana -- + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + description: HTTP and gRPC RED metrics visualization for Grafana Beyla + editable: false + fiscalYearStartMonth: 0 + graphTooltip: 0 + id: 38 + links: [] + liveNow: false + panels: + - datasource: + type: prometheus + uid: ${datasource} + description: the slowest HTTP routes sorted by response time percentile + fieldConfig: + defaults: + color: + mode: thresholds + custom: + align: auto + cellOptions: + type: json-view + filterable: true + inspect: false + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: s + overrides: + - matcher: + id: byName + options: Time + properties: + - id: custom.hidden + value: true + - matcher: + id: byName + options: Value + properties: + - id: custom.hidden + value: true + - matcher: + id: byName + options: Duration (ms) + properties: + - id: decimals + value: 0 + gridPos: + h: 5 + w: 13 + x: 0 + "y": 0 + id: 35 + options: + cellHeight: sm + footer: + countRows: false + fields: "" + reducer: + - sum + show: false + frameIndex: 2 + showHeader: true + sortBy: + - desc: true + displayName: Duration (ms) + pluginVersion: 10.1.0-57417pre + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sort_desc(topk by(http_route, service_name) (5, max by (http_route, service_name) (histogram_quantile(0.95, (sum by(http_route, service_name, le) (rate({__name__=~"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket",instance=~"$instance",job=~"$job"}[$__rate_interval]))))))) + format: table + hide: false + instant: true + legendFormat: '{{service_name}} - {{http_route}}' + range: false + refId: A + title: Slowest HTTP routes (P95) + transformations: + - id: calculateField + options: + alias: Duration (ms) + binary: + left: Value + operator: '*' + reducer: sum + right: "1000" + mode: binary + reduce: + reducer: sum + replaceFields: false + - id: organize + options: + excludeByName: {} + indexByName: + Duration (ms): 4 + Time: 1 + Value: 3 + http_route: 2 + service_name: 0 + renameByName: {} + type: table + - datasource: + type: prometheus + uid: ${datasource} + description: the slowest remote procedure calls (RPC) in your application sorted by response time percentile + fieldConfig: + defaults: + color: + mode: thresholds + custom: + align: auto + cellOptions: + type: json-view + filterable: true + inspect: false + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: s + overrides: + - matcher: + id: byName + options: Time + properties: + - id: custom.hidden + value: true + - matcher: + id: byName + options: Value + properties: + - id: custom.hidden + value: true + - matcher: + id: byName + options: Duration (ms) + properties: + - id: decimals + value: 0 + gridPos: + h: 5 + w: 11 + x: 13 + "y": 0 + id: 56 + options: + cellHeight: sm + footer: + countRows: false + fields: "" + reducer: + - sum + show: false + frameIndex: 2 + showHeader: true + sortBy: + - desc: true + displayName: Duration (ms) + pluginVersion: 9.5.2 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sort_desc(topk by(rpc_method, service_name) (5, max by (rpc_method, service_name) (histogram_quantile(0.95, (sum by(rpc_method, service_name, le) (rate({__name__=~"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket",instance=~"$instance",job=~"$job"}[$__rate_interval]))))))) + format: table + hide: false + instant: true + legendFormat: '{{service_name}} - {{http_route}}' + range: false + refId: A + title: Slowest RPC methods (P95) + transformations: + - id: calculateField + options: + alias: Duration (ms) + binary: + left: Value + operator: '*' + reducer: sum + right: "1000" + mode: binary + reduce: + reducer: sum + replaceFields: false + type: table + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + "y": 5 + id: 72 + panels: [] + repeat: Service + repeatDirection: h + title: 'Inbound: $Service' + type: row + - datasource: + type: prometheus + uid: ${datasource} + description: average and top percentiles for both HTTP and gRPC traffic + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + drawStyle: line + fillOpacity: 63 + gradientMode: opacity + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 3 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: s + overrides: [] + gridPos: + h: 8 + w: 8 + x: 0 + "y": 6 + id: 1 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + legendFormat: HTTP p99 + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.95, sum(rate({__name__=~"http_server_request_duration_seconds_bucket|http_server_request_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) by (service_name, le)) + hide: false + legendFormat: HTTP p95 + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"http_server_request_duration_seconds_sum|http_server_request_duration_sum",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) / sum(rate({__name__=~"http_server_request_duration_seconds_count|http_server_request_duration_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) + hide: false + legendFormat: HTTP Avg + range: true + refId: C + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + hide: false + legendFormat: RPC p99 + range: true + refId: D + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.95, sum by(service_name, le) (rate({__name__=~"rpc_server_duration_seconds_bucket|rpc_server_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + hide: false + legendFormat: RPC p95 + range: true + refId: E + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"rpc_server_duration_seconds_sum|rpc_server_duration_sum",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) / sum(rate({__name__=~"rpc_server_duration_seconds_count|rpc_server_duration_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) + hide: false + legendFormat: RPC Avg + range: true + refId: F + title: Duration + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: number of requests per second, faceted by its HTTP or gRPC return code + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: reqps + overrides: [] + gridPos: + h: 8 + w: 8 + x: 8 + "y": 6 + id: 58 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"http_server_request_duration_seconds_count|http_server_request_duration_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) by (http_response_status_code) + hide: false + legendFormat: HTTP server - {{http_response_status_code}} + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"rpc_server_duration_seconds_count|rpc_server_duration_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) by (service_name, rpc_grpc_status_code) + hide: false + legendFormat: RPC server (status {{rpc_grpc_status_code}}) + range: true + refId: D + title: Request rate + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: percentage of 4xx|5xx HTTP responses or non-zero gRPC responses over the total of the requests + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + axisSoftMax: 1 + barAlignment: 0 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: percentunit + overrides: [] + gridPos: + h: 8 + w: 8 + x: 16 + "y": 6 + id: 59 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum by (http_response_status_code) (rate({__name__=~"http_server_request_duration_seconds_count|http_server_request_duration_count",service_name="${Service}",http_response_status_code=~"(4|5).*",instance=~"$instance",job=~"$job"}[$__rate_interval])) / ignoring(http_response_status_code) group_left sum(rate({__name__=~"http_server_request_duration_seconds_count|http_server_request_duration_count",service_name="${Service}",instance=~"$instance",job=~"$job"}[$__rate_interval])) + hide: false + legendFormat: HTTP server - {{http_response_status_code}} + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum by (rpc_grpc_status_code) (rate({__name__=~"rpc_server_duration_seconds_count|rpc_server_duration_count",service_name="${Service}",rpc_grpc_status_code!="0",instance=~"$instance",job=~"$job"}[$__rate_interval])) / ignoring(rpc_grpc_status_code) group_left sum(rate({__name__=~"rpc_server_duration_seconds_count|rpc_server_duration_count",service_name="${Service}",instance=~"$instance",job=~"$job"}[$__rate_interval])) + hide: false + legendFormat: RPC server (status {{rpc_grpc_status_code}}) + range: true + refId: D + title: Error rate + type: timeseries + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + "y": 14 + id: 5 + panels: [] + repeat: Service + repeatDirection: h + title: 'Outbound: $Service' + type: row + - datasource: + type: prometheus + uid: ${datasource} + description: average and top percentiles for both HTTP and gRPC traffic + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + drawStyle: line + fillOpacity: 63 + gradientMode: opacity + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 3 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: s + overrides: [] + gridPos: + h: 8 + w: 8 + x: 0 + "y": 15 + id: 6 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~"http_client_request_duration_seconds_bucket|http_client_request_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + legendFormat: HTTP p99 + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: 'histogram_quantile(0.95, sum(rate({__name__=~"http_client_request_duration_seconds_bucket|http_client_request_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) by (service_name, le)) ' + hide: false + legendFormat: HTTP p95 + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"http_client_request_duration_seconds_sum|http_client_request_duration_sum",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) / sum(rate({__name__=~"http_client_request_duration_count|http_client_request_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) + hide: false + legendFormat: HTTP Avg + range: true + refId: C + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"rpc_client_duration_seconds_sum|rpc_client_duration_sum",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) / sum(rate({__name__=~"rpc_client_duration_count|rpc_client_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) + hide: false + legendFormat: RPC Avg + range: true + refId: D + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: histogram_quantile(0.99, sum by(service_name, le) (rate({__name__=~"rpc_client_duration_seconds_bucket|rpc_client_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval]))) + hide: false + legendFormat: RPC p99 + range: true + refId: E + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: 'histogram_quantile(0.95, sum(rate({__name__=~"rpc_client_duration_seconds_bucket|rpc_client_duration_bucket",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) by (service_name, le)) ' + hide: false + legendFormat: RPC p95 + range: true + refId: F + title: Duration + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: number of requests per second, faceted by its HTTP or gRPC return code + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: reqps + overrides: [] + gridPos: + h: 8 + w: 8 + x: 8 + "y": 15 + id: 8 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"http_client_request_duration_count|http_client_request_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) by (service_name, http_response_status_code) + legendFormat: HTTP client - {{http_response_status_code}} + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"rpc_client_duration_count|rpc_client_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"} [$__rate_interval])) by (service_name, rpc_grpc_status_code) + hide: false + legendFormat: RPC client (status {{rpc_grpc_status_code}}) + range: true + refId: C + title: Rate + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: percentage of 5xx HTTP responses or non-zero gRPC responses over the total of the requests + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + axisSoftMax: 1 + barAlignment: 0 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: percentunit + overrides: + - __systemRef: hideSeriesFrom + matcher: + id: byNames + options: + mode: exclude + names: + - HTTP server - 500 + prefix: 'All except:' + readOnly: true + properties: + - id: custom.hideFrom + value: + legend: false + tooltip: false + viz: false + gridPos: + h: 8 + w: 8 + x: 16 + "y": 15 + id: 73 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum by (http_response_status_code) (rate({__name__=~"http_client_request_duration_count|http_client_request_duration_seconds_count",service_name="$Service",http_response_status_code=~"5.*",instance=~"$instance",job=~"$job"}[$__rate_interval])) / ignoring(http_response_status_code) group_left sum(rate({__name__=~"http_client_request_duration_count|http_client_request_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) + hide: false + legendFormat: HTTP client - {{http_response_status_code}} + range: true + refId: B + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum by (rpc_grpc_status_code) (rate({__name__=~"rpc_client_duration_count|rpc_client_duration_seconds_count",service_name="$Service",rpc_grpc_status_code!="0",instance=~"$instance",job=~"$job"}[$__rate_interval])) / ignoring(rpc_grpc_status_code) group_left sum(rate({__name__=~"rpc_client_duration_count|rpc_client_duration_seconds_count",service_name="$Service",instance=~"$instance",job=~"$job"}[$__rate_interval])) + hide: false + legendFormat: RPC client (status {{rpc_grpc_status_code}}) + range: true + refId: D + title: Error rate + type: timeseries + refresh: 30s + schemaVersion: 38 + tags: + - ebpf + - red + - http + - grpc + - https + - beyla + templating: + list: + - current: + selected: false + text: Prometheus + value: Prometheus + description: Source of the metrics (e.g. Prometheus source) + hide: 0 + includeAll: false + label: Data source + multi: false + name: datasource + options: [] + query: prometheus + queryValue: "" + refresh: 1 + regex: "" + skipUrlSync: false + type: datasource + - current: + selected: true + text: + - All + value: + - $__all + datasource: + type: prometheus + uid: ${datasource} + definition: label_values(service_name) + hide: 0 + includeAll: true + label: Service + multi: true + name: Service + options: [] + query: + query: label_values(service_name) + refId: PrometheusVariableQueryEditor-VariableQuery + refresh: 2 + regex: "" + skipUrlSync: false + sort: 5 + type: query + - allValue: .+ + current: + selected: true + text: + - All + value: + - $__all + datasource: + type: prometheus + uid: ${datasource} + definition: label_values(job) + hide: 0 + includeAll: true + label: Job + multi: true + name: job + options: [] + query: + query: label_values(job) + refId: PrometheusVariableQueryEditor-VariableQuery + refresh: 2 + regex: "" + skipUrlSync: false + sort: 0 + type: query + - allValue: .+ + current: + selected: true + text: + - All + value: + - $__all + datasource: + type: prometheus + uid: ${datasource} + definition: label_values(instance) + hide: 0 + includeAll: true + label: Instance + multi: true + name: instance + options: [] + query: + query: label_values(instance) + refId: PrometheusVariableQueryEditor-VariableQuery + refresh: 2 + regex: "" + skipUrlSync: false + sort: 0 + type: query + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Job + multi: true + name: job + query: label_values(up{job!="kubelet"}, job) + refresh: 2 + sort: 1 + type: query + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Instance + multi: true + name: instance + query: label_values(up{job=~"$job"}, instance) + refresh: 2 + sort: 1 + type: query + time: + from: now-30m + to: now + timepicker: {} + timezone: "" + title: Beyla RED Metrics + uid: 422313feb73f77366e56faa7870b77ec + version: 3 + weekStart: "" + application_process.json: + __elements: {} + __inputs: + - description: "" + label: grafanacloud-prom + name: DS_GRAFANACLOUD_PROM + pluginId: prometheus + pluginName: Prometheus + type: datasource + __requires: + - id: grafana + name: Grafana + type: grafana + version: 11.3.0-75696 + - id: prometheus + name: Prometheus + type: datasource + version: 1.0.0 + - id: timeseries + name: Time series + type: panel + version: "" + annotations: + list: + - builtIn: 1 + datasource: + type: grafana + uid: -- Grafana -- + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + description: Information about the processes belonging to the applications instrumented by Beyla + editable: false + fiscalYearStartMonth: 0 + graphTooltip: 0 + id: null + links: [] + panels: + - datasource: + type: prometheus + uid: ${datasource} + description: CPU utilization percentage for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: percentunit + overrides: [] + gridPos: + h: 8 + w: 24 + x: 0 + "y": 0 + id: 2 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_cpu_time.*",process_pid="",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_cpu_time.*",process_pid!="",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + instant: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: CPU percent + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Physical memory usage for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: bytes + overrides: [] + gridPos: + h: 8 + w: 12 + x: 0 + "y": 8 + id: 3 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum({__name__=~"process_memory_usage.*",process_pid="",job=~"$job",instance=~"$instance"} + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum({__name__=~"process_memory_usage.*",process_pid!="",job=~"$job",instance=~"$instance"}) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Physical memory + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Virtual memory usage for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: bytes + overrides: [] + gridPos: + h: 8 + w: 12 + x: 12 + "y": 8 + id: 4 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum({__name__=~"process_memory_virtual.*",process_pid="",job=~"$job",instance=~"$instance"} + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum({__name__=~"process_memory_virtual.*",process_pid!="",job=~"$job",instance=~"$instance"}) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Virtual memory + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Disk read operations rate for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: binBps + overrides: [] + gridPos: + h: 8 + w: 12 + x: 0 + "y": 16 + id: 5 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_disk_io.*",process_pid="",disk_io_direction="read",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_disk_io.*",process_pid!="",disk_io_direction="read",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Disk I/O read + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Disk write operations rate for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: binBps + overrides: [] + gridPos: + h: 8 + w: 12 + x: 12 + "y": 16 + id: 7 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_disk_io.*",process_pid="",disk_io_direction="write",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_disk_io.*",process_pid!="",disk_io_direction="write",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Disk I/O write + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Network receive throughput for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: binBps + overrides: [] + gridPos: + h: 8 + w: 12 + x: 0 + "y": 24 + id: 6 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_network_io.*",process_pid="",network_io_direction="receive",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_network_io.*",process_pid!="",network_io_direction="receive",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Network I/O receive + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: Network transmit throughput for each instrumented process + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: binBps + overrides: [] + gridPos: + h: 8 + w: 12 + x: 12 + "y": 24 + id: 8 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-75696 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_network_io.*",process_pid="",network_io_direction="transmit",job=~"$job",instance=~"$instance"}[$__rate_interval]) + on(instance) group_left(k8s_pod_name,k8s_namespace_name,process_command,process_pid) (0 * target_info{process_pid!="",job=~"$job",instance=~"$instance"})) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: A + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: sum(rate({__name__=~"process_network_io.*",process_pid!="",network_io_direction="transmit",job=~"$job",instance=~"$instance"}[$__rate_interval])) by(k8s_pod_name,k8s_namespace_name,process_command,process_pid) + hide: false + legendFormat: '{{process_command}}/{{process_pid}} (Pod: {{k8s_namespace_name}}/{{k8s_pod_name}})' + range: true + refId: B + title: Network I/O transmit + type: timeseries + schemaVersion: 39 + tags: + - process + - beyla + templating: + list: + - label: Data source + name: datasource + query: prometheus + regex: "" + type: datasource + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Job + multi: true + name: job + query: label_values(up{job!="kubelet"}, job) + refresh: 2 + sort: 1 + type: query + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Instance + multi: true + name: instance + query: label_values(up{job=~"$job"}, instance) + refresh: 2 + sort: 1 + type: query + time: + from: now-15m + to: now + timepicker: {} + timezone: browser + title: Beyla process metrics + uid: a0d490796b9cf61d5e83aa79cb3463ee + version: 40 + weekStart: "" + beyla_debug.json: + annotations: + list: + - builtIn: 1 + datasource: + type: grafana + uid: -- Grafana -- + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + editable: false + fiscalYearStartMonth: 0 + graphTooltip: 0 + id: 72 + links: [] + panels: + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + "y": 0 + id: 12 + panels: [] + title: Overview + type: row + - datasource: + type: prometheus + uid: ${datasource} + description: Current version of Beyla running in the cluster + fieldConfig: + defaults: + color: + mode: thresholds + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: + - matcher: + id: byName + options: "" + properties: [] + gridPos: + h: 3 + w: 3 + x: 0 + "y": 1 + id: 2 + options: + colorMode: value + graphMode: area + justifyMode: auto + orientation: auto + percentChangeColorMode: standard + reduceOptions: + calcs: + - lastNotNull + fields: /^version$/ + values: false + showPercentChange: false + textMode: auto + wideLayout: true + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: beyla_internal_build_info{job=~"$job",instance=~"$instance"} + format: table + instant: true + legendFormat: __auto + range: false + refId: A + title: Beyla Version + type: stat + - datasource: + type: prometheus + uid: ${datasource} + description: Number of Beyla pods currently running in the cluster + fieldConfig: + defaults: + color: + mode: thresholds + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: [] + gridPos: + h: 3 + w: 3 + x: 3 + "y": 1 + id: 5 + options: + colorMode: value + graphMode: area + justifyMode: auto + orientation: auto + percentChangeColorMode: standard + reduceOptions: + calcs: + - lastNotNull + fields: "" + values: false + showPercentChange: false + textMode: auto + wideLayout: true + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + expr: kube_daemonset_status_number_ready{daemonset="beyla",job=~"$job",instance=~"$instance"} + instant: false + legendFormat: __auto + range: true + refId: A + title: 'DaemonSet: Running pods' + type: stat + - datasource: + type: prometheus + uid: ${datasource} + description: Total number of OpenTelemetry metrics exported by Beyla + fieldConfig: + defaults: + color: + mode: thresholds + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: [] + gridPos: + h: 3 + w: 3 + x: 6 + "y": 1 + id: 3 + options: + colorMode: value + graphMode: area + justifyMode: auto + orientation: auto + percentChangeColorMode: standard + reduceOptions: + calcs: + - lastNotNull + fields: "" + values: false + showPercentChange: false + textMode: auto + wideLayout: true + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + disableTextWrap: false + editorMode: builder + exemplar: false + expr: sum by(cluster) (beyla_otel_metric_exports_total{job=~"$job",instance=~"$instance"}) + fullMetaSearch: false + includeNullMetadata: true + instant: false + legendFormat: __auto + range: true + refId: A + useBackend: false + title: OTEL metric exports + type: stat + - datasource: + type: prometheus + uid: ${datasource} + description: Total number of OpenTelemetry traces exported by Beyla + fieldConfig: + defaults: + color: + mode: thresholds + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: [] + gridPos: + h: 3 + w: 3 + x: 9 + "y": 1 + id: 4 + options: + colorMode: value + graphMode: area + justifyMode: auto + orientation: auto + percentChangeColorMode: standard + reduceOptions: + calcs: + - lastNotNull + fields: "" + values: false + showPercentChange: false + textMode: auto + wideLayout: true + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + editorMode: code + exemplar: false + expr: sum by(cluster) (beyla_otel_trace_exports_total{job=~"$job",instance=~"$instance"}) + instant: false + legendFormat: __auto + range: true + refId: A + title: OTEL trace exports + type: stat + - datasource: + type: prometheus + uid: ${datasource} + description: List of all processes discovered and instrumented by Beyla + fieldConfig: + defaults: + color: + mode: thresholds + custom: + align: auto + cellOptions: + type: auto + inspect: false + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + overrides: + - matcher: + id: byRegexp + options: ^(?!.*process|Value).*$ + properties: + - id: custom.hidden + value: true + gridPos: + h: 17 + w: 8 + x: 0 + "y": 4 + id: 1 + options: + cellHeight: sm + footer: + countRows: false + fields: "" + reducer: + - sum + show: false + showHeader: true + sortBy: + - desc: false + displayName: process_name + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + disableTextWrap: false + editorMode: builder + exemplar: false + expr: sum by(process_name) (beyla_instrumented_processes{job=~"$job",instance=~"$instance"}) + format: table + fullMetaSearch: false + includeNullMetadata: true + instant: true + interval: "" + legendFormat: __auto + range: false + refId: A + useBackend: false + title: Discovered Processes + type: table + - datasource: + type: prometheus + uid: ${datasource} + description: Memory usage of Beyla pods over time + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: decbytes + overrides: [] + gridPos: + h: 8 + w: 16 + x: 8 + "y": 4 + id: 8 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + disableTextWrap: false + editorMode: builder + exemplar: false + expr: process_memory_usage_bytes{job="default/beyla",job=~"$job",instance=~"$instance"} + format: time_series + fullMetaSearch: false + hide: false + includeNullMetadata: true + instant: false + interval: "" + legendFormat: '{{k8s_pod_name}}' + range: true + refId: A + useBackend: false + title: Memory Usage + type: timeseries + - datasource: + type: prometheus + uid: ${datasource} + description: CPU utilization of Beyla processes over time + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + axisBorderShow: false + axisCenteredZero: false + axisColorMode: text + axisLabel: "" + axisPlacement: auto + barAlignment: 0 + barWidthFactor: 0.6 + drawStyle: line + fillOpacity: 0 + gradientMode: none + hideFrom: + legend: false + tooltip: false + viz: false + insertNulls: false + lineInterpolation: linear + lineWidth: 1 + pointSize: 5 + scaleDistribution: + type: linear + showPoints: auto + spanNulls: false + stacking: + group: A + mode: none + thresholdsStyle: + mode: "off" + mappings: [] + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 80 + unit: percentunit + overrides: [] + gridPos: + h: 9 + w: 16 + x: 8 + "y": 12 + id: 9 + options: + legend: + calcs: [] + displayMode: list + placement: bottom + showLegend: true + tooltip: + mode: single + sort: none + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: prometheus + uid: ${datasource} + disableTextWrap: false + editorMode: builder + exemplar: false + expr: sum by(process_command) (process_cpu_utilization_ratio{job="default/beyla",job=~"$job",instance=~"$instance"}) + format: time_series + fullMetaSearch: false + hide: false + includeNullMetadata: true + instant: false + interval: "" + legendFormat: '{{process_command}}' + range: true + refId: A + useBackend: false + title: CPU Usage + type: timeseries + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + "y": 21 + id: 11 + panels: [] + title: Logs + type: row + - datasource: + type: loki + uid: grafanacloud-logs + fieldConfig: + defaults: {} + overrides: [] + gridPos: + h: 9 + w: 24 + x: 0 + "y": 22 + id: 6 + options: + dedupStrategy: none + enableLogDetails: true + prettifyLogMessage: false + showCommonLabels: false + showLabels: false + showTime: false + sortOrder: Descending + wrapLogMessage: false + pluginVersion: 11.3.0-74868 + targets: + - datasource: + type: loki + uid: grafanacloud-logs + editorMode: code + expr: '{job="default/beyla"} | logfmt | component!=`BPFLogger`' + queryType: range + refId: A + title: Application Logs + type: logs + - datasource: + type: loki + uid: grafanacloud-logs + fieldConfig: + defaults: {} + overrides: [] + gridPos: + h: 9 + w: 24 + x: 0 + "y": 31 + id: 10 + options: + dedupStrategy: none + enableLogDetails: true + prettifyLogMessage: false + showCommonLabels: false + showLabels: false + showTime: false + sortOrder: Descending + wrapLogMessage: false + pluginVersion: 11.2.0-73179 + targets: + - datasource: + type: loki + uid: grafanacloud-logs + editorMode: code + expr: '{job="default/beyla"} | logfmt | component=`BPFLogger`' + queryType: range + refId: A + title: BPF Logs + type: logs + - datasource: + type: loki + uid: grafanacloud-logs + fieldConfig: + defaults: {} + overrides: [] + gridPos: + h: 5 + w: 24 + x: 0 + "y": 40 + id: 7 + options: + dedupStrategy: none + enableLogDetails: true + prettifyLogMessage: false + showCommonLabels: false + showLabels: false + showTime: false + sortOrder: Descending + wrapLogMessage: false + pluginVersion: 11.2.0-73179 + targets: + - datasource: + type: loki + uid: grafanacloud-logs + editorMode: code + expr: '{job="default/beyla"} |= `ERROR`' + queryType: range + refId: A + title: Application Errors + type: logs + preload: false + refresh: 10s + schemaVersion: 39 + tags: + - beyla + - ebpf + templating: + list: + - label: Data source + name: datasource + query: prometheus + regex: "" + type: datasource + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Job + multi: true + name: job + query: label_values(up{job!="kubelet"}, job) + refresh: 2 + sort: 1 + type: query + - allValue: .+ + datasource: + type: prometheus + uid: ${datasource} + includeAll: true + label: Instance + multi: true + name: instance + query: label_values(up{job=~"$job"}, instance) + refresh: 2 + sort: 1 + type: query + - baseFilters: [] + datasource: + type: loki + uid: grafanacloud-logs + filters: [] + name: Filters + type: adhoc + time: + from: now-6h + to: now + timepicker: {} + timezone: browser + title: Beyla + uid: b0d6558d68f5f420831661892b6a2207 + version: 19 + weekStart: "" +prometheus: + - apiVersion: grizzly.grafana.com/v1alpha1 + kind: PrometheusRuleGroup + metadata: + name: beyla_internal_alerts + namespace: grizzly_rules + spec: + rules: [] +prometheusAlerts: + groups: + - name: beyla_internal_alerts + rules: [] +syntheticMonitoringChecks: {} +