diff --git a/CHANGELOG.md b/CHANGELOG.md index 23c8b77bf9..fd966f2506 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ ### Mixin +* [ENHANCEMENT] Unify ingester autoscaling panels on 'Mimir / Writes' dashboard to work for both ingest-storage and non-ingest-storage autoscaling. #9617 * [BUGFIX] Dashboards: Fix autoscaling metrics joins when series churn. #9412 #9450 #9432 * [BUGFIX] Alerts: Fix autoscaling metrics joins in `MimirAutoscalerNotActive` when series churn. #9412 diff --git a/operations/mimir-mixin/config.libsonnet b/operations/mimir-mixin/config.libsonnet index 9542769a68..e3527e1d87 100644 --- a/operations/mimir-mixin/config.libsonnet +++ b/operations/mimir-mixin/config.libsonnet @@ -653,6 +653,7 @@ ingester: { enabled: false, hpa_name: $._config.autoscaling_hpa_prefix + 'ingester-zone-a', + replica_template_name: 'ingester-zone-a', }, compactor: { enabled: false, diff --git a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet index 1b2f559cfc..3617fa78b7 100644 --- a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet +++ b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet @@ -617,7 +617,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; ), // The provided componentName should be the name of a component among the ones defined in $._config.autoscaling. - autoScalingActualReplicas(componentName):: + autoScalingActualReplicas(componentName, addlQueries=[], addlLegends=[]):: local title = 'Replicas'; local componentTitle = std.strReplace(componentName, '_', '-'); @@ -660,12 +660,12 @@ local utils = import 'mixin-utils/utils.libsonnet'; hpa_name: $._config.autoscaling[componentName].hpa_name, cluster_labels: std.join(', ', $._config.cluster_labels), }, - ], + ] + addlQueries, [ 'Max {{ scaletargetref_name }}', 'Current {{ scaletargetref_name }}', 'Min {{ scaletargetref_name }}', - ], + ] + addlLegends, ) + $.panelDescription( title, diff --git a/operations/mimir-mixin/dashboards/writes.libsonnet b/operations/mimir-mixin/dashboards/writes.libsonnet index dc03b5931a..ca7f73b748 100644 --- a/operations/mimir-mixin/dashboards/writes.libsonnet +++ b/operations/mimir-mixin/dashboards/writes.libsonnet @@ -370,56 +370,50 @@ local filename = 'mimir-writes.json'; $._config.autoscaling.ingester.enabled, $.row('Ingester – autoscaling') .addPanel( - $.autoScalingActualReplicas('ingester') + { title: 'Replicas (leader zone)' } + + local replicaTemplateQueries = [ + 'max(kube_customresource_replicatemplate_spec_replicas{%(namespace_matcher)s, name=~"%(replica_template_name)s"})' % { + namespace_matcher: $.namespaceMatcher(), + replica_template_name: $._config.autoscaling.ingester.replica_template_name, + }, + 'max(kube_customresource_replicatemplate_status_replicas{%(namespace_matcher)s, name=~"%(replica_template_name)s"})' % { + namespace_matcher: $.namespaceMatcher(), + replica_template_name: $._config.autoscaling.ingester.replica_template_name, + }, + ]; + + local replicaTemplateLegends = [ + 'Template spec replicas', + 'Template status replicas', + ]; + + $.autoScalingActualReplicas('ingester', replicaTemplateQueries, replicaTemplateLegends) + { title: 'Replicas (HPA + ReplicaTemplate)' } + $.panelDescription( - 'Replicas (leader zone)', + 'Replicas (HPA + ReplicaTemplate)', ||| - The minimum, maximum, and current number of replicas for the leader zone of ingesters. - Other zones scale to follow this zone (with delay for downscale). + The minimum, maximum, and current number of replicas reported by the HPA for the ReplicaTemplate object. + If available, also the spec and status replicas fields for the ReplicaTemplate object itself. + Rollout-operator will keep ingester replicas updated based on the ReplicaTemplate spec field, and then update the template's status field once the ingester count changes. ||| ) ) .addPanel( - $.timeseriesPanel('Replicas') + - $.panelDescription('Replicas', 'Number of ingester replicas per zone.') + - $.queryPanel( - [ - 'sum by (%s) (up{%s})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester)], - ], - [ - '{{ %(per_job_label)s }}' % $._config.per_job_label, - ], - ), - ) - .addPanel( - $.autoScalingDesiredReplicasByValueScalingMetricPanel('ingester', '', '') + { title: 'Desired replicas (leader zone)' } - ) - .addPanel( - $.autoScalingFailuresPanel('ingester') + { title: 'Autoscaler failures rate' } - ), - ) - .addRowIf( - $._config.show_ingest_storage_panels && $._config.autoscaling.ingester.enabled, - $.row('Ingester – autoscaling (ingest storage)') - .addPanel( - $.autoScalingActualReplicas('ingester') + { title: 'Replicas (ReplicaTemplate)' } + + $.timeseriesPanel('Replicas (Ingesters)') + $.panelDescription( - 'Replicas (ReplicaTemplate)', + 'Replicas (Ingesters)', ||| - The minimum, maximum, and current number of replicas for the ReplicaTemplate object. - Rollout-operator will keep ingester replicas updated based on this object. + Number of up ingester replicas per zone. + Also show the number of read-only replicas per zone, or number of Inactive partitions for ingest storage. ||| - ) - ) - .addPanel( - $.timeseriesPanel('Replicas') + - $.panelDescription('Replicas', 'Number of ingester replicas.') + - $.queryPanel( + ) + $.queryPanel( [ 'sum by (%s) (up{%s})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester)], + 'sum by (%s) (cortex_lifecycler_read_only{%s}) unless on (%s) (cortex_partition_ring_partitions{name="ingester-partitions"})' % [$._config.per_job_label, $.jobMatcher($._config.job_names.ingester), $._config.per_job_label], + 'max(cortex_partition_ring_partitions{%s,name="ingester-partitions",state="Inactive"})' % [$.namespaceMatcher()], ], [ - '{{ %(per_job_label)s }}' % $._config.per_job_label, + 'up ({{ %(per_job_label)s }})' % $._config.per_job_label, + 'read-only ({{ %(per_job_label)s }})' % $._config.per_job_label, + 'inactive partitions', ], ), )