diff --git a/charts/chainlink-cluster/dashboard/cmd/dashboard_deploy.go b/charts/chainlink-cluster/dashboard/cmd/dashboard_deploy.go index a96df835447..a818ff44095 100644 --- a/charts/chainlink-cluster/dashboard/cmd/dashboard_deploy.go +++ b/charts/chainlink-cluster/dashboard/cmd/dashboard_deploy.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/smartcontractkit/chainlink/charts/chainlink-cluster/dashboard/dashboard" "os" + "strings" ) func main() { @@ -11,8 +12,11 @@ func main() { if name == "" { panic("DASHBOARD_NAME must be provided") } - // Can be empty + lokiDataSourceName := os.Getenv("LOKI_DATA_SOURCE_NAME") + if lokiDataSourceName == "" { + fmt.Println("LOKI_DATA_SOURCE_NAME is empty, panels with logs will be disabled") + } prometheusDataSourceName := os.Getenv("PROMETHEUS_DATA_SOURCE_NAME") if prometheusDataSourceName == "" { @@ -39,6 +43,13 @@ func main() { panic("INFRA_PLATFORM must be provided, can be either docker|kubernetes") } + panelsIncluded := os.Getenv("PANELS_INCLUDED") + // can be empty + if panelsIncluded == "" { + fmt.Println("PANELS_INCLUDED can be provided to specify panels groups, value must be separated by comma. Possible values are: core, wasp") + } + panelsIncludedArray := strings.Split(panelsIncluded, ",") + err := dashboard.NewDashboard( name, grafanaURL, @@ -48,7 +59,7 @@ func main() { lokiDataSourceName, prometheusDataSourceName, infraPlatform, - []string{"core"}, + panelsIncludedArray, nil, ) if err != nil { diff --git a/charts/chainlink-cluster/dashboard/dashboard.go b/charts/chainlink-cluster/dashboard/dashboard.go index cc128fea15b..47c0d4fcb31 100644 --- a/charts/chainlink-cluster/dashboard/dashboard.go +++ b/charts/chainlink-cluster/dashboard/dashboard.go @@ -57,7 +57,9 @@ func NewDashboard( } db.init() db.addCoreVariables() - db.addCorePanels() + if Contains(db.panels, "core") { + db.addCorePanels() + } switch db.platform { case "kubernetes": diff --git a/charts/chainlink-cluster/dashboard/panels.go b/charts/chainlink-cluster/dashboard/panels.go index 8a5abb8d450..b130860f27d 100644 --- a/charts/chainlink-cluster/dashboard/panels.go +++ b/charts/chainlink-cluster/dashboard/panels.go @@ -12,10 +12,107 @@ import ( ) func (m *Dashboard) addMainPanels() { - var podRestartPanel row.Option = nil var ethBalancePanelSpanSize float32 = 6 + var panelsIncluded []row.Option + var goVersionLegend string = "version" + if m.platform == "kubernetes" { - podRestartPanel = row.WithStat( + ethBalancePanelSpanSize = 4 + goVersionLegend = "exported_version" + } + + globalInfoPanels := []row.Option{ + row.WithStat( + "App Version", + stat.DataSource(m.PrometheusDataSourceName), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationVertical), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(2), + stat.Text("name"), + stat.Height("100px"), + stat.WithPrometheusTarget( + `version{`+m.panelOption.labelFilter+`=~"$instance"}`, + prometheus.Legend("{{version}}"), + ), + ), + row.WithStat( + "Go Version", + stat.DataSource(m.PrometheusDataSourceName), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationVertical), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(2), + stat.Text("name"), + stat.Height("100px"), + stat.WithPrometheusTarget( + `go_info{`+m.panelOption.labelFilter+`=~"$instance"}`, + prometheus.Legend("{{"+goVersionLegend+"}}"), + ), + ), + row.WithStat( + "Uptime in days", + stat.DataSource(m.PrometheusDataSourceName), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationVertical), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(2), + stat.Height("100px"), + stat.WithPrometheusTarget( + `uptime_seconds{`+m.panelOption.labelFilter+`=~"$instance"} / 86400`, + prometheus.Legend("{{"+m.panelOption.labelFilter+"}}"), + ), + ), + row.WithStat( + "ETH Balance", + stat.DataSource(m.PrometheusDataSourceName), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationVertical), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(ethBalancePanelSpanSize), + stat.Height("100px"), + stat.Decimals(2), + stat.WithPrometheusTarget( + `eth_balance{`+m.panelOption.labelFilter+`=~"$instance"}`, + prometheus.Legend("{{"+m.panelOption.labelFilter+"}} - {{account}}"), + ), + ), + } + + additionalPanels := []row.Option{ + row.WithTimeSeries( + "Service Components Health", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(m.PrometheusDataSourceName), + timeseries.WithPrometheusTarget( + `health{`+m.panelOption.labelFilter+`=~"$instance"}`, + prometheus.Legend("{{"+m.panelOption.labelFilter+"}} - {{service_id}}"), + ), + ), + row.WithTimeSeries( + "ETH Balance", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(m.PrometheusDataSourceName), + timeseries.Axis( + axis.Unit(""), + axis.Decimals(2), + ), + timeseries.WithPrometheusTarget( + `eth_balance{`+m.panelOption.labelFilter+`=~"$instance"}`, + prometheus.Legend("{{"+m.panelOption.labelFilter+"}} - {{account}}"), + ), + ), + } + + panelsIncluded = append(panelsIncluded, globalInfoPanels...) + if m.platform == "kubernetes" { + panelsIncluded = append(panelsIncluded, row.WithStat( "Pod Restarts", stat.Span(2), stat.Height("100px"), @@ -24,97 +121,14 @@ func (m *Dashboard) addMainPanels() { `sum(increase(kube_pod_container_status_restarts_total{pod=~"$instance.*", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, prometheus.Legend("{{pod}}"), ), - ) - ethBalancePanelSpanSize = 4 + )) } + panelsIncluded = append(panelsIncluded, additionalPanels...) opts := []dashboard.Option{ dashboard.Row( "Global health", - row.WithStat( - "App Version", - stat.DataSource(m.PrometheusDataSourceName), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationVertical), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(2), - stat.Text("name"), - stat.Height("100px"), - stat.WithPrometheusTarget( - `version{`+m.panelOption.labelFilter+`=~"$instance"}`, - prometheus.Legend("{{version}}"), - ), - ), - row.WithStat( - "Go Version", - stat.DataSource(m.PrometheusDataSourceName), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationVertical), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(2), - stat.Text("name"), - stat.Height("100px"), - stat.WithPrometheusTarget( - `go_info{`+m.panelOption.labelFilter+`=~"$instance"}`, - prometheus.Legend("{{exported_version}}"), - ), - ), - row.WithStat( - "Uptime in days", - stat.DataSource(m.PrometheusDataSourceName), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationVertical), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(2), - stat.Height("100px"), - stat.WithPrometheusTarget( - `uptime_seconds{`+m.panelOption.labelFilter+`=~"$instance"} / 86400`, - prometheus.Legend("{{"+m.panelOption.labelFilter+"}}"), - ), - ), - row.WithStat( - "ETH Balance", - stat.DataSource(m.PrometheusDataSourceName), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationVertical), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(ethBalancePanelSpanSize), - stat.Height("100px"), - stat.Decimals(2), - stat.WithPrometheusTarget( - `eth_balance{`+m.panelOption.labelFilter+`=~"$instance"}`, - prometheus.Legend("{{"+m.panelOption.labelFilter+"}} - {{account}}"), - ), - ), - podRestartPanel, - row.WithTimeSeries( - "Service Components Health", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(m.PrometheusDataSourceName), - timeseries.WithPrometheusTarget( - `health{`+m.panelOption.labelFilter+`=~"$instance"}`, - prometheus.Legend("{{"+m.panelOption.labelFilter+"}} - {{service_id}}"), - ), - ), - row.WithTimeSeries( - "ETH Balance", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(m.PrometheusDataSourceName), - timeseries.Axis( - axis.Unit(""), - axis.Decimals(2), - ), - timeseries.WithPrometheusTarget( - `eth_balance{`+m.panelOption.labelFilter+`=~"$instance"}`, - prometheus.Legend("{{"+m.panelOption.labelFilter+"}} - {{account}}"), - ), - ), + panelsIncluded..., ), } @@ -127,7 +141,7 @@ func (m *Dashboard) addKubePanels() { "Pod health", row.WithTimeSeries( "Pod Restarts", - timeseries.Span(12), + timeseries.Span(4), timeseries.Height("200px"), timeseries.DataSource(m.PrometheusDataSourceName), timeseries.WithPrometheusTarget( @@ -135,6 +149,26 @@ func (m *Dashboard) addKubePanels() { prometheus.Legend("{{pod}}"), ), ), + row.WithTimeSeries( + "OOM Events", + timeseries.Span(4), + timeseries.Height("200px"), + timeseries.DataSource(m.PrometheusDataSourceName), + timeseries.WithPrometheusTarget( + `sum(increase(container_oom_events_total{pod=~"$instance.*", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithTimeSeries( + "OOM Killed", + timeseries.Span(4), + timeseries.Height("200px"), + timeseries.DataSource(m.PrometheusDataSourceName), + timeseries.WithPrometheusTarget( + `sum(increase(kube_pod_container_status_last_terminated_reason{reason="OOMKilled", pod=~"$instance.*", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), row.WithTimeSeries( "CPU Usage", timeseries.Span(6), diff --git a/charts/chainlink-cluster/dashboard/utils.go b/charts/chainlink-cluster/dashboard/utils.go new file mode 100644 index 00000000000..cc095d13eba --- /dev/null +++ b/charts/chainlink-cluster/dashboard/utils.go @@ -0,0 +1,10 @@ +package dashboard + +func Contains[T comparable](arr []T, x T) bool { + for _, v := range arr { + if v == x { + return true + } + } + return false +}