From bf96ee26f0cbdfc044b92b57533a2eda2d54c511 Mon Sep 17 00:00:00 2001 From: Patryk Prus Date: Tue, 8 Oct 2024 17:23:36 -0400 Subject: [PATCH 1/7] Collapse the two ingester autoscaling rows into a single row, visibile if ingester autoscaling is enabled in the config --- .../mimir-mixin/dashboards/writes.libsonnet | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/operations/mimir-mixin/dashboards/writes.libsonnet b/operations/mimir-mixin/dashboards/writes.libsonnet index dc03b5931a1..a99ee9dedf1 100644 --- a/operations/mimir-mixin/dashboards/writes.libsonnet +++ b/operations/mimir-mixin/dashboards/writes.libsonnet @@ -368,38 +368,6 @@ local filename = 'mimir-writes.json'; ) .addRowIf( $._config.autoscaling.ingester.enabled, - $.row('Ingester – autoscaling') - .addPanel( - $.autoScalingActualReplicas('ingester') + { title: 'Replicas (leader zone)' } + - $.panelDescription( - 'Replicas (leader zone)', - ||| - The minimum, maximum, and current number of replicas for the leader zone of ingesters. - Other zones scale to follow this zone (with delay for downscale). - ||| - ) - ) - .addPanel( - $.timeseriesPanel('Replicas') + - $.panelDescription('Replicas', 'Number of ingester replicas per zone.') + - $.queryPanel( - [ - 'sum by (%s) (up{%s})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester)], - ], - [ - '{{ %(per_job_label)s }}' % $._config.per_job_label, - ], - ), - ) - .addPanel( - $.autoScalingDesiredReplicasByValueScalingMetricPanel('ingester', '', '') + { title: 'Desired replicas (leader zone)' } - ) - .addPanel( - $.autoScalingFailuresPanel('ingester') + { title: 'Autoscaler failures rate' } - ), - ) - .addRowIf( - $._config.show_ingest_storage_panels && $._config.autoscaling.ingester.enabled, $.row('Ingester – autoscaling (ingest storage)') .addPanel( $.autoScalingActualReplicas('ingester') + { title: 'Replicas (ReplicaTemplate)' } + From af34547c46029cef2ef92a57c0b2a2bd4a94a90f Mon Sep 17 00:00:00 2001 From: Patryk Prus Date: Tue, 15 Oct 2024 01:28:32 -0400 Subject: [PATCH 2/7] Add replica_template_name to jsonnet --- operations/mimir-mixin/config.libsonnet | 1 + 1 file changed, 1 insertion(+) diff --git a/operations/mimir-mixin/config.libsonnet b/operations/mimir-mixin/config.libsonnet index 9542769a68f..e3527e1d876 100644 --- a/operations/mimir-mixin/config.libsonnet +++ b/operations/mimir-mixin/config.libsonnet @@ -653,6 +653,7 @@ ingester: { enabled: false, hpa_name: $._config.autoscaling_hpa_prefix + 'ingester-zone-a', + replica_template_name: 'ingester-zone-a', }, compactor: { enabled: false, From 0b63468f6bf03fdb7e413db9eb705e78d1c6069c Mon Sep 17 00:00:00 2001 From: Patryk Prus Date: Tue, 15 Oct 2024 01:52:16 -0400 Subject: [PATCH 3/7] Update autoscaling panels with replicaTemplate and read-only queries --- .../dashboards/dashboard-utils.libsonnet | 6 +-- .../mimir-mixin/dashboards/writes.libsonnet | 52 +++++++++++++++---- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet index 1b2f559cfc9..3617fa78b78 100644 --- a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet +++ b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet @@ -617,7 +617,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; ), // The provided componentName should be the name of a component among the ones defined in $._config.autoscaling. - autoScalingActualReplicas(componentName):: + autoScalingActualReplicas(componentName, addlQueries=[], addlLegends=[]):: local title = 'Replicas'; local componentTitle = std.strReplace(componentName, '_', '-'); @@ -660,12 +660,12 @@ local utils = import 'mixin-utils/utils.libsonnet'; hpa_name: $._config.autoscaling[componentName].hpa_name, cluster_labels: std.join(', ', $._config.cluster_labels), }, - ], + ] + addlQueries, [ 'Max {{ scaletargetref_name }}', 'Current {{ scaletargetref_name }}', 'Min {{ scaletargetref_name }}', - ], + ] + addlLegends, ) + $.panelDescription( title, diff --git a/operations/mimir-mixin/dashboards/writes.libsonnet b/operations/mimir-mixin/dashboards/writes.libsonnet index a99ee9dedf1..17bff03d55f 100644 --- a/operations/mimir-mixin/dashboards/writes.libsonnet +++ b/operations/mimir-mixin/dashboards/writes.libsonnet @@ -368,26 +368,60 @@ local filename = 'mimir-writes.json'; ) .addRowIf( $._config.autoscaling.ingester.enabled, - $.row('Ingester – autoscaling (ingest storage)') + $.row('Ingester – autoscaling') .addPanel( - $.autoScalingActualReplicas('ingester') + { title: 'Replicas (ReplicaTemplate)' } + + local replicaTemplateQueries = [ + ||| + max by (name) ( + kube_customresource_replicatemplate_spec_replicas{%(namespace_matcher)s, name=~"%(replica_template_name)s"} + ) + ||| % { + namespace_matcher: $.namespaceMatcher(), + replica_template_name: $._config.autoscaling.ingester.replica_template_name, + }, + ||| + max by (name) ( + kube_customresource_replicatemplate_status_replicas{%(namespace_matcher)s, name=~"%(replica_template_name)s"} + ) + ||| % { + namespace_matcher: $.namespaceMatcher(), + replica_template_name: $._config.autoscaling.ingester.replica_template_name, + }, + ]; + + local replicaTemplateLegends = [ + 'Tmpl spec replicas', + 'Tmpl status replicas', + ]; + + $.autoScalingActualReplicas('ingester', replicaTemplateQueries, replicaTemplateLegends) + { title: 'Replicas (HPA + ReplicaTemplate)' } + $.panelDescription( - 'Replicas (ReplicaTemplate)', + 'Replicas (HPA + ReplicaTemplate)', ||| - The minimum, maximum, and current number of replicas for the ReplicaTemplate object. - Rollout-operator will keep ingester replicas updated based on this object. + The minimum, maximum, and current number of replicas reported by the HPA for the ReplicaTemplate object. + If available, also the spec and status replicas fields for the ReplicaTemplate object itself. + Rollout-operator will keep ingester replicas updated based on the ReplicaTemplate spec field, and then update the template's status field once the ingester count changes. ||| ) ) .addPanel( - $.timeseriesPanel('Replicas') + - $.panelDescription('Replicas', 'Number of ingester replicas.') + - $.queryPanel( + $.timeseriesPanel('Replicas (Ingesters)') + + $.panelDescription( + 'Replicas (Ingesters)', + ||| + Number of up ingester replicas per zone. + Also show the number of read-only replicas per zone, or number of Inactive partitions for ingest storage. + ||| + ) + $.queryPanel( [ 'sum by (%s) (up{%s})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester)], + 'sum by (%s) (cortex_lifecycler_read_only{%s}) unless on (%s) (cortex_partition_ring_partitions{name="ingester-partitions"})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester), $._config.per_job_label], // TODO: need to not show this if using ingest storage + 'max by (name) (cortex_partition_ring_partitions{%s,name="ingester-partitions",state="Inactive"})' % [$.namespaceMatcher()], ], [ - '{{ %(per_job_label)s }}' % $._config.per_job_label, + 'up ({{ %(per_job_label)s }})' % $._config.per_job_label, + 'read-only ({{ %(per_job_label)s }})' % $._config.per_job_label, + 'inactive partitions', ], ), ) From 6cc46fd36ec2971f3ad5bf8dd1ed718716699e67 Mon Sep 17 00:00:00 2001 From: Patryk Prus Date: Tue, 15 Oct 2024 02:02:14 -0400 Subject: [PATCH 4/7] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23c8b77bf9d..fd966f2506b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ ### Mixin +* [ENHANCEMENT] Unify ingester autoscaling panels on 'Mimir / Writes' dashboard to work for both ingest-storage and non-ingest-storage autoscaling. #9617 * [BUGFIX] Dashboards: Fix autoscaling metrics joins when series churn. #9412 #9450 #9432 * [BUGFIX] Alerts: Fix autoscaling metrics joins in `MimirAutoscalerNotActive` when series churn. #9412 From 577a7a2d71ed9cb004b05f35f791045ffef37370 Mon Sep 17 00:00:00 2001 From: Patryk Prus Date: Tue, 15 Oct 2024 10:13:54 -0400 Subject: [PATCH 5/7] Update operations/mimir-mixin/dashboards/writes.libsonnet Co-authored-by: Marco Pracucci --- operations/mimir-mixin/dashboards/writes.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/mimir-mixin/dashboards/writes.libsonnet b/operations/mimir-mixin/dashboards/writes.libsonnet index 17bff03d55f..070c9f45232 100644 --- a/operations/mimir-mixin/dashboards/writes.libsonnet +++ b/operations/mimir-mixin/dashboards/writes.libsonnet @@ -390,8 +390,8 @@ local filename = 'mimir-writes.json'; ]; local replicaTemplateLegends = [ - 'Tmpl spec replicas', - 'Tmpl status replicas', + 'Template spec replicas', + 'Template status replicas', ]; $.autoScalingActualReplicas('ingester', replicaTemplateQueries, replicaTemplateLegends) + { title: 'Replicas (HPA + ReplicaTemplate)' } + From 1ec77341f57a35032670efbcca860c4efffb85e1 Mon Sep 17 00:00:00 2001 From: Patryk Prus Date: Tue, 15 Oct 2024 10:15:48 -0400 Subject: [PATCH 6/7] Clean up TODO comment --- operations/mimir-mixin/dashboards/writes.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/mimir-mixin/dashboards/writes.libsonnet b/operations/mimir-mixin/dashboards/writes.libsonnet index 070c9f45232..580b6658dfe 100644 --- a/operations/mimir-mixin/dashboards/writes.libsonnet +++ b/operations/mimir-mixin/dashboards/writes.libsonnet @@ -415,7 +415,7 @@ local filename = 'mimir-writes.json'; ) + $.queryPanel( [ 'sum by (%s) (up{%s})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester)], - 'sum by (%s) (cortex_lifecycler_read_only{%s}) unless on (%s) (cortex_partition_ring_partitions{name="ingester-partitions"})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester), $._config.per_job_label], // TODO: need to not show this if using ingest storage + 'sum by (%s) (cortex_lifecycler_read_only{%s}) unless on (%s) (cortex_partition_ring_partitions{name="ingester-partitions"})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester), $._config.per_job_label], 'max by (name) (cortex_partition_ring_partitions{%s,name="ingester-partitions",state="Inactive"})' % [$.namespaceMatcher()], ], [ From d3f47266487ff63d5707affa136a3b75a6ed3d93 Mon Sep 17 00:00:00 2001 From: Patryk Prus Date: Tue, 15 Oct 2024 11:17:22 -0400 Subject: [PATCH 7/7] Remove unnecessary 'by' clauses --- operations/mimir-mixin/dashboards/writes.libsonnet | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/operations/mimir-mixin/dashboards/writes.libsonnet b/operations/mimir-mixin/dashboards/writes.libsonnet index 580b6658dfe..ca7f73b7487 100644 --- a/operations/mimir-mixin/dashboards/writes.libsonnet +++ b/operations/mimir-mixin/dashboards/writes.libsonnet @@ -371,19 +371,11 @@ local filename = 'mimir-writes.json'; $.row('Ingester – autoscaling') .addPanel( local replicaTemplateQueries = [ - ||| - max by (name) ( - kube_customresource_replicatemplate_spec_replicas{%(namespace_matcher)s, name=~"%(replica_template_name)s"} - ) - ||| % { + 'max(kube_customresource_replicatemplate_spec_replicas{%(namespace_matcher)s, name=~"%(replica_template_name)s"})' % { namespace_matcher: $.namespaceMatcher(), replica_template_name: $._config.autoscaling.ingester.replica_template_name, }, - ||| - max by (name) ( - kube_customresource_replicatemplate_status_replicas{%(namespace_matcher)s, name=~"%(replica_template_name)s"} - ) - ||| % { + 'max(kube_customresource_replicatemplate_status_replicas{%(namespace_matcher)s, name=~"%(replica_template_name)s"})' % { namespace_matcher: $.namespaceMatcher(), replica_template_name: $._config.autoscaling.ingester.replica_template_name, }, @@ -416,7 +408,7 @@ local filename = 'mimir-writes.json'; [ 'sum by (%s) (up{%s})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester)], 'sum by (%s) (cortex_lifecycler_read_only{%s}) unless on (%s) (cortex_partition_ring_partitions{name="ingester-partitions"})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester), $._config.per_job_label], - 'max by (name) (cortex_partition_ring_partitions{%s,name="ingester-partitions",state="Inactive"})' % [$.namespaceMatcher()], + 'max(cortex_partition_ring_partitions{%s,name="ingester-partitions",state="Inactive"})' % [$.namespaceMatcher()], ], [ 'up ({{ %(per_job_label)s }})' % $._config.per_job_label,