From fc7f7ab582bf33736b1cd8c63a2e0d3ca562d202 Mon Sep 17 00:00:00 2001 From: Clement Erena Date: Fri, 5 Apr 2024 18:11:19 +0200 Subject: [PATCH 1/2] feat(dashboard): add dashboard for core node components health --- .../core-node-components/component.go | 172 ++++++++++++++++++ .../core-node-components/platform.go | 52 ++++++ 2 files changed, 224 insertions(+) create mode 100644 dashboard-lib/core-node-components/component.go create mode 100644 dashboard-lib/core-node-components/platform.go diff --git a/dashboard-lib/core-node-components/component.go b/dashboard-lib/core-node-components/component.go new file mode 100644 index 00000000000..5a94339a5f2 --- /dev/null +++ b/dashboard-lib/core-node-components/component.go @@ -0,0 +1,172 @@ +package core_node_components + +import ( + "github.com/K-Phoen/grabana/dashboard" + "github.com/K-Phoen/grabana/row" + "github.com/K-Phoen/grabana/stat" + "github.com/K-Phoen/grabana/table" + "github.com/K-Phoen/grabana/target/prometheus" + "github.com/K-Phoen/grabana/timeseries" + "github.com/K-Phoen/grabana/timeseries/axis" + "github.com/K-Phoen/grabana/variable/interval" + "github.com/K-Phoen/grabana/variable/query" +) + +type Props struct { + PrometheusDataSource string + PlatformOpts PlatformOpts +} + +func vars(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.VariableAsInterval( + "interval", + interval.Values([]string{"30s", "1m", "5m", "15m", "30m", "1h", "6h", "12h"}), + ), + dashboard.VariableAsQuery( + "env", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(up, env)"), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "cluster", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(up{env=\"$env\"}, cluster)"), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "blockchain", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\"}, blockchain)"), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "product", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\"}, product)"), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "network_type", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", product=\"$product\"}, network_type)"), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "component", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\"}, component)"), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "service", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\", component=\"$component\"}, service)"), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "service_id", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(health{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\", component=\"$component\", service=\"$service\"}, service_id)"), + query.Sort(query.NumericalAsc), + ), + } +} + +func generalInfoRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "General CL Cluster Info", + // row.Collapse(), + row.WithTable( + "List Nodes", + table.Span(12), + table.HideColumn("Time"), + table.HideColumn("Value"), + table.DataSource(p.PrometheusDataSource), + table.WithPrometheusTarget( + `max(up{`+p.PlatformOpts.LabelQuery+`}) by (env, cluster, blockchain, product, network_type, network, version, team, component, service)`, + prometheus.Legend(""), + prometheus.Format("table"), + prometheus.Instant(), + ), + ), + row.WithTimeSeries( + "Uptime", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Max(1), + axis.Max(0), + axis.Unit("bool"), + axis.Label("Alive"), + ), + timeseries.WithPrometheusTarget( + `up{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend(""), + ), + ), + row.WithTimeSeries( + "Service Components Health by Service", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Legend(timeseries.ToTheRight), + timeseries.WithPrometheusTarget( + `health{`+p.PlatformOpts.LabelQuery+`service_id=~"${service_id}"}`, + prometheus.Legend("{{service_id}}"), + ), + ), + row.WithTimeSeries( + "Service Components Health Avg by Service", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Legend(timeseries.ToTheRight), + timeseries.WithPrometheusTarget( + `avg(avg_over_time(health{`+p.PlatformOpts.LabelQuery+`service_id=~"${service_id}"}[$interval])) by (service_id, version, service, cluster, env)`, + prometheus.Legend("{{service_id}}"), + ), + ), + row.WithStat( + "Service Components Health Avg by Service", + stat.Span(12), + stat.Height("200px"), + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationVertical), + stat.SparkLine(), + stat.TitleFontSize(4), + stat.ValueFontSize(12), + stat.WithPrometheusTarget( + `avg(avg_over_time(health{`+p.PlatformOpts.LabelQuery+`service_id=~"${service_id}"}[$interval])) by (service_id, version, service, cluster, env)`, + prometheus.Legend("{{service_id}}"), + ), + ), + ), + } +} + +func New(p Props) []dashboard.Option { + opts := vars(p) + opts = append(opts, generalInfoRow(p)...) + return opts +} diff --git a/dashboard-lib/core-node-components/platform.go b/dashboard-lib/core-node-components/platform.go new file mode 100644 index 00000000000..4568ab4ce9e --- /dev/null +++ b/dashboard-lib/core-node-components/platform.go @@ -0,0 +1,52 @@ +package core_node_components + +import "fmt" + +type PlatformOpts struct { + // Platform is infrastructure deployment platform: docker or k8s + Platform string + LabelFilters map[string]string + LabelFilter string + LegendString string + LabelQuery string +} + +// PlatformPanelOpts generate different queries for "docker" and "k8s" deployment platforms +func PlatformPanelOpts(platform string) PlatformOpts { + po := PlatformOpts{ + LabelFilters: map[string]string{ + "instance": `=~"${instance}"`, + "commit": `=~"${commit:pipe}"`, + }, + } + switch platform { + case "kubernetes": + po.LabelFilters = map[string]string{ + // "namespace": `=~"${namespace}"`, + // "pod": `=~"${pod}"`, + "env": `=~"${env}"`, + "cluster": `=~"${cluster}"`, + "blockchain": `=~"${blockchain}"`, + "product": `=~"${product}"`, + "network_type": `=~"${network_type}"`, + "component": `=~"${component}"`, + "service": `=~"${service}"`, + } + po.LabelFilter = "job" + po.LegendString = "pod" + break + case "docker": + po.LabelFilters = map[string]string{ + "instance": `=~"${instance}"`, + } + po.LabelFilter = "instance" + po.LegendString = "instance" + break + default: + panic(fmt.Sprintf("failed to generate Platform dependent queries, unknown platform: %s", platform)) + } + for key, value := range po.LabelFilters { + po.LabelQuery += key + value + ", " + } + return po +} From 34c201c2974ca161cb9a17939cd891ba09d2f464 Mon Sep 17 00:00:00 2001 From: Clement Erena Date: Wed, 17 Apr 2024 16:58:27 +0200 Subject: [PATCH 2/2] feat(dashboard): core_components cleanup code --- .../chainlink-cluster/dashboard/cmd/deploy.go | 13 ++++++- .../core-node-components/component.go | 37 ++++++++++--------- .../core-node-components/platform.go | 27 +------------- 3 files changed, 34 insertions(+), 43 deletions(-) diff --git a/charts/chainlink-cluster/dashboard/cmd/deploy.go b/charts/chainlink-cluster/dashboard/cmd/deploy.go index 24c3af4589b..6ee19b909a4 100644 --- a/charts/chainlink-cluster/dashboard/cmd/deploy.go +++ b/charts/chainlink-cluster/dashboard/cmd/deploy.go @@ -5,6 +5,7 @@ import ( lib "github.com/smartcontractkit/chainlink/dashboard-lib" atlas_don "github.com/smartcontractkit/chainlink/dashboard-lib/atlas-don" core_don "github.com/smartcontractkit/chainlink/dashboard-lib/core-don" + core_node_components "github.com/smartcontractkit/chainlink/dashboard-lib/core-node-components" k8spods "github.com/smartcontractkit/chainlink/dashboard-lib/k8s-pods" waspdb "github.com/smartcontractkit/wasp/dashboard" "strings" @@ -30,6 +31,16 @@ func main() { // TODO: refactor as a component later addWASPRows(db, cfg) } + if cfg.PanelsIncluded["core_components"] { + db.Add( + core_node_components.New( + core_node_components.Props{ + PrometheusDataSource: cfg.DataSources.Prometheus, + PlatformOpts: core_node_components.PlatformPanelOpts(), + }, + ), + ) + } if cfg.PanelsIncluded["ocr"] || cfg.PanelsIncluded["ocr2"] || cfg.PanelsIncluded["ocr3"] { for key := range cfg.PanelsIncluded { if strings.Contains(key, "ocr") { @@ -45,7 +56,7 @@ func main() { } } } - if cfg.Platform == "kubernetes" { + if !cfg.PanelsIncluded["core_components"] && cfg.Platform == "kubernetes" { db.Add( k8spods.New( k8spods.Props{ diff --git a/dashboard-lib/core-node-components/component.go b/dashboard-lib/core-node-components/component.go index 5a94339a5f2..47e23c073ba 100644 --- a/dashboard-lib/core-node-components/component.go +++ b/dashboard-lib/core-node-components/component.go @@ -22,11 +22,11 @@ func vars(p Props) []dashboard.Option { dashboard.VariableAsInterval( "interval", interval.Values([]string{"30s", "1m", "5m", "15m", "30m", "1h", "6h", "12h"}), + interval.Default("15m"), ), dashboard.VariableAsQuery( "env", query.DataSource(p.PrometheusDataSource), - query.Multiple(), query.IncludeAll(), query.Request("label_values(up, env)"), query.Sort(query.NumericalAsc), @@ -34,49 +34,43 @@ func vars(p Props) []dashboard.Option { dashboard.VariableAsQuery( "cluster", query.DataSource(p.PrometheusDataSource), - query.Multiple(), query.IncludeAll(), - query.Request("label_values(up{env=\"$env\"}, cluster)"), + query.Request(`label_values(up{env="$env"}, cluster)`), query.Sort(query.NumericalAsc), ), dashboard.VariableAsQuery( "blockchain", query.DataSource(p.PrometheusDataSource), - query.Multiple(), query.IncludeAll(), - query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\"}, blockchain)"), + query.Request(`label_values(up{env="$env", cluster="$cluster"}, blockchain)`), query.Sort(query.NumericalAsc), ), dashboard.VariableAsQuery( "product", query.DataSource(p.PrometheusDataSource), - query.Multiple(), query.IncludeAll(), - query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\"}, product)"), + query.Request(`label_values(up{env="$env", cluster="$cluster", blockchain="$blockchain"}, product)`), query.Sort(query.NumericalAsc), ), dashboard.VariableAsQuery( "network_type", query.DataSource(p.PrometheusDataSource), - query.Multiple(), query.IncludeAll(), - query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", product=\"$product\"}, network_type)"), + query.Request(`label_values(up{env="$env", cluster="$cluster", blockchain="$blockchain", product="$product"}, network_type)`), query.Sort(query.NumericalAsc), ), dashboard.VariableAsQuery( "component", query.DataSource(p.PrometheusDataSource), - query.Multiple(), query.IncludeAll(), - query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\"}, component)"), + query.Request(`label_values(up{env="$env", cluster="$cluster", blockchain="$blockchain", network_type="$network_type"}, component)`), query.Sort(query.NumericalAsc), ), dashboard.VariableAsQuery( "service", query.DataSource(p.PrometheusDataSource), - query.Multiple(), query.IncludeAll(), - query.Request("label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\", component=\"$component\"}, service)"), + query.Request(`label_values(up{env="$env", cluster="$cluster", blockchain="$blockchain", network_type="$network_type", component="$component"}, service)`), query.Sort(query.NumericalAsc), ), dashboard.VariableAsQuery( @@ -84,7 +78,7 @@ func vars(p Props) []dashboard.Option { query.DataSource(p.PrometheusDataSource), query.Multiple(), query.IncludeAll(), - query.Request("label_values(health{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\", component=\"$component\", service=\"$service\"}, service_id)"), + query.Request(`label_values(health{env="$env", cluster="$cluster", blockchain="$blockchain", network_type="$network_type", component="$component", service="$service"}, service_id)`), query.Sort(query.NumericalAsc), ), } @@ -94,7 +88,6 @@ func generalInfoRow(p Props) []dashboard.Option { return []dashboard.Option{ dashboard.Row( "General CL Cluster Info", - // row.Collapse(), row.WithTable( "List Nodes", table.Span(12), @@ -121,7 +114,8 @@ func generalInfoRow(p Props) []dashboard.Option { ), timeseries.WithPrometheusTarget( `up{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend(""), + //prometheus.Legend(""), + prometheus.Legend("Team: {{team}} env: {{env}} cluster: {{cluster}} namespace: {{namespace}} job: {{job}} blockchain: {{blockchain}} product: {{product}} networkType: {{network_type}} component: {{component}} service: {{service}}"), ), ), row.WithTimeSeries( @@ -154,12 +148,17 @@ func generalInfoRow(p Props) []dashboard.Option { stat.Text(stat.TextValueAndName), stat.Orientation(stat.OrientationVertical), stat.SparkLine(), - stat.TitleFontSize(4), + stat.TitleFontSize(8), stat.ValueFontSize(12), stat.WithPrometheusTarget( `avg(avg_over_time(health{`+p.PlatformOpts.LabelQuery+`service_id=~"${service_id}"}[$interval])) by (service_id, version, service, cluster, env)`, prometheus.Legend("{{service_id}}"), ), + stat.AbsoluteThresholds([]stat.ThresholdStep{ + {Color: "#FF0000", Value: float64Ptr(0.0)}, + {Color: "#FF0000", Value: float64Ptr(0.8)}, + {Color: "#008000", Value: float64Ptr(1.0)}, + }), ), ), } @@ -170,3 +169,7 @@ func New(p Props) []dashboard.Option { opts = append(opts, generalInfoRow(p)...) return opts } + +func float64Ptr(input float64) *float64 { + return &input +} diff --git a/dashboard-lib/core-node-components/platform.go b/dashboard-lib/core-node-components/platform.go index 4568ab4ce9e..9d85cf82c77 100644 --- a/dashboard-lib/core-node-components/platform.go +++ b/dashboard-lib/core-node-components/platform.go @@ -1,7 +1,5 @@ package core_node_components -import "fmt" - type PlatformOpts struct { // Platform is infrastructure deployment platform: docker or k8s Platform string @@ -12,18 +10,9 @@ type PlatformOpts struct { } // PlatformPanelOpts generate different queries for "docker" and "k8s" deployment platforms -func PlatformPanelOpts(platform string) PlatformOpts { +func PlatformPanelOpts() PlatformOpts { po := PlatformOpts{ LabelFilters: map[string]string{ - "instance": `=~"${instance}"`, - "commit": `=~"${commit:pipe}"`, - }, - } - switch platform { - case "kubernetes": - po.LabelFilters = map[string]string{ - // "namespace": `=~"${namespace}"`, - // "pod": `=~"${pod}"`, "env": `=~"${env}"`, "cluster": `=~"${cluster}"`, "blockchain": `=~"${blockchain}"`, @@ -31,19 +20,7 @@ func PlatformPanelOpts(platform string) PlatformOpts { "network_type": `=~"${network_type}"`, "component": `=~"${component}"`, "service": `=~"${service}"`, - } - po.LabelFilter = "job" - po.LegendString = "pod" - break - case "docker": - po.LabelFilters = map[string]string{ - "instance": `=~"${instance}"`, - } - po.LabelFilter = "instance" - po.LegendString = "instance" - break - default: - panic(fmt.Sprintf("failed to generate Platform dependent queries, unknown platform: %s", platform)) + }, } for key, value := range po.LabelFilters { po.LabelQuery += key + value + ", "