Skip to content

Commit

Permalink
Fix: YAML indentation (#520)
Browse files Browse the repository at this point in the history
Signed-off-by: Nicolas Lamirault <[email protected]>
  • Loading branch information
nlamirault authored Sep 4, 2023
1 parent fbda2b4 commit 93f8231
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 159 deletions.
6 changes: 3 additions & 3 deletions charts/tempo-mixin/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ keywords:
- tempo
- monitoring-mixin
- portefaix
version: 1.4.0
version: 1.4.1
appVersion: 2.0.0

maintainers:
Expand All @@ -51,5 +51,5 @@ annotations:
fingerprint: C39918B3EBDE35C23B8D0B8E5F99269A6FCA437C
url: https://keybase.io/nlamirault/pgp_keys.asc
artifacthub.io/changes: |
- kind: changed
description: Includes additionalLabels and additionalAnnotations on configmaps
- kind: fixed
description: YAML indentation
284 changes: 142 additions & 142 deletions charts/tempo-mixin/templates/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,145 +13,145 @@ metadata:
{{- end }}
spec:
groups:
- name: tempo_alerts
rules:
- alert: TempoRequestLatency
annotations:
message: |
{{`{{`}} $labels.job {{`}}`}} {{`{{`}} $labels.route {{`}}`}} is experiencing {{`{{`}} printf "%.2f" $value {{`}}`}}s 99th percentile latency.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoRequestLatency
expr: |
cluster_namespace_job_route:tempo_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|debug_pprof"} > 3
for: 15m
labels:
severity: critical
- alert: TempoCompactorUnhealthy
annotations:
message: There are {{`{{`}} printf "%f" $value {{`}}`}} unhealthy compactor(s).
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorUnhealthy
expr: |
max by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="compactor", namespace=~".*"}) > 0
for: 15m
labels:
severity: critical
- alert: TempoDistributorUnhealthy
annotations:
message: There are {{`{{`}} printf "%f" $value {{`}}`}} unhealthy distributor(s).
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoDistributorUnhealthy
expr: |
max by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="distributor", namespace=~".*"}) > 0
for: 15m
labels:
severity: warning
- alert: TempoCompactionsFailing
annotations:
message: Greater than 2 compactions have failed in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactionsFailing
expr: |
sum by (cluster, namespace) (increase(tempodb_compaction_errors_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempodb_compaction_errors_total{}[5m])) > 0
for: 5m
labels:
severity: critical
- alert: TempoIngesterFlushesUnhealthy
annotations:
message: Greater than 2 flush retries have occurred in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoIngesterFlushesFailing
expr: |
sum by (cluster, namespace) (increase(tempo_ingester_failed_flushes_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempo_ingester_failed_flushes_total{}[5m])) > 0
for: 5m
labels:
severity: warning
- alert: TempoIngesterFlushesFailing
annotations:
message: Greater than 2 flush retries have failed in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoIngesterFlushesFailing
expr: |
sum by (cluster, namespace) (increase(tempo_ingester_flush_failed_retries_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempo_ingester_flush_failed_retries_total{}[5m])) > 0
for: 5m
labels:
severity: critical
- alert: TempoPollsFailing
annotations:
message: Greater than 2 polls have failed in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoPollsFailing
expr: |
sum by (cluster, namespace) (increase(tempodb_blocklist_poll_errors_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempodb_blocklist_poll_errors_total{}[5m])) > 0
labels:
severity: critical
- alert: TempoTenantIndexFailures
annotations:
message: Greater than 2 tenant index failures in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoTenantIndexFailures
expr: |
sum by (cluster, namespace) (increase(tempodb_blocklist_tenant_index_errors_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempodb_blocklist_tenant_index_errors_total{}[5m])) > 0
labels:
severity: critical
- alert: TempoNoTenantIndexBuilders
annotations:
message: No tenant index builders for tenant {{`{{`}} $labels.tenant {{`}}`}}. Tenant index will quickly become stale.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoNoTenantIndexBuilders
expr: |
sum by (cluster, namespace, tenant) (tempodb_blocklist_tenant_index_builder{}) == 0 and
max by (cluster, namespace) (tempodb_blocklist_length{}) > 0
for: 5m
labels:
severity: critical
- alert: TempoTenantIndexTooOld
annotations:
message: Tenant index age is 600 seconds old for tenant {{`{{`}} $labels.tenant {{`}}`}}.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoTenantIndexTooOld
expr: |
max by (cluster, namespace, tenant) (tempodb_blocklist_tenant_index_age_seconds{}) > 600
for: 5m
labels:
severity: critical
- alert: TempoBadOverrides
annotations:
message: '{{`{{`}} $labels.job {{`}}`}} failed to reload overrides.'
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoBadOverrides
expr: |
sum(tempo_runtime_config_last_reload_successful{namespace=~".*"} == 0) by (cluster, namespace, job)
for: 15m
labels:
severity: warning
- alert: TempoProvisioningTooManyWrites
annotations:
message: Ingesters in {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} are receiving more data/second than desired, add more ingesters.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoProvisioningTooManyWrites
expr: |
avg by (cluster, namespace) (rate(tempo_ingester_bytes_received_total{job=~".+/ingester"}[1m])) / 1024 / 1024 > 30
for: 15m
labels:
severity: warning
- alert: TempoCompactorsTooManyOutstandingBlocks
annotations:
message: There are too many outstanding compaction blocks in {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} for tenant {{`{{`}} $labels.tenant {{`}}`}}, increase compactor's CPU or add more compactors.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorsTooManyOutstandingBlocks
expr: |
sum by (cluster, namespace, tenant) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~".*"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~".*"}) by (cluster, namespace) > 100
for: 6h
labels:
severity: warning
- alert: TempoCompactorsTooManyOutstandingBlocks
annotations:
message: There are too many outstanding compaction blocks in {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} for tenant {{`{{`}} $labels.tenant {{`}}`}}, increase compactor's CPU or add more compactors.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorsTooManyOutstandingBlocks
expr: |
sum by (cluster, namespace, tenant) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~".*"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~".*"}) by (cluster, namespace) > 250
for: 24h
labels:
severity: critical
- alert: TempoIngesterReplayErrors
annotations:
message: Tempo ingester has encountered errors while replaying a block on startup in {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} for tenant {{`{{`}} $labels.tenant {{`}}`}}
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoIngesterReplayErrors
expr: |
sum by (cluster, namespace, tenant) (increase(tempo_ingester_replay_errors_total{namespace=~".*"}[5m])) > 0
for: 5m
labels:
severity: critical
- name: tempo_alerts
rules:
- alert: TempoRequestLatency
annotations:
message: |
{{`{{`}} $labels.job {{`}}`}} {{`{{`}} $labels.route {{`}}`}} is experiencing {{`{{`}} printf "%.2f" $value {{`}}`}}s 99th percentile latency.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoRequestLatency
expr: |
cluster_namespace_job_route:tempo_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|debug_pprof"} > 3
for: 15m
labels:
severity: critical
- alert: TempoCompactorUnhealthy
annotations:
message: There are {{`{{`}} printf "%f" $value {{`}}`}} unhealthy compactor(s).
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorUnhealthy
expr: |
max by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="compactor", namespace=~".*"}) > 0
for: 15m
labels:
severity: critical
- alert: TempoDistributorUnhealthy
annotations:
message: There are {{`{{`}} printf "%f" $value {{`}}`}} unhealthy distributor(s).
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoDistributorUnhealthy
expr: |
max by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="distributor", namespace=~".*"}) > 0
for: 15m
labels:
severity: warning
- alert: TempoCompactionsFailing
annotations:
message: Greater than 2 compactions have failed in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactionsFailing
expr: |
sum by (cluster, namespace) (increase(tempodb_compaction_errors_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempodb_compaction_errors_total{}[5m])) > 0
for: 5m
labels:
severity: critical
- alert: TempoIngesterFlushesUnhealthy
annotations:
message: Greater than 2 flush retries have occurred in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoIngesterFlushesFailing
expr: |
sum by (cluster, namespace) (increase(tempo_ingester_failed_flushes_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempo_ingester_failed_flushes_total{}[5m])) > 0
for: 5m
labels:
severity: warning
- alert: TempoIngesterFlushesFailing
annotations:
message: Greater than 2 flush retries have failed in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoIngesterFlushesFailing
expr: |
sum by (cluster, namespace) (increase(tempo_ingester_flush_failed_retries_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempo_ingester_flush_failed_retries_total{}[5m])) > 0
for: 5m
labels:
severity: critical
- alert: TempoPollsFailing
annotations:
message: Greater than 2 polls have failed in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoPollsFailing
expr: |
sum by (cluster, namespace) (increase(tempodb_blocklist_poll_errors_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempodb_blocklist_poll_errors_total{}[5m])) > 0
labels:
severity: critical
- alert: TempoTenantIndexFailures
annotations:
message: Greater than 2 tenant index failures in the past hour.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoTenantIndexFailures
expr: |
sum by (cluster, namespace) (increase(tempodb_blocklist_tenant_index_errors_total{}[1h])) > 2 and
sum by (cluster, namespace) (increase(tempodb_blocklist_tenant_index_errors_total{}[5m])) > 0
labels:
severity: critical
- alert: TempoNoTenantIndexBuilders
annotations:
message: No tenant index builders for tenant {{`{{`}} $labels.tenant {{`}}`}}. Tenant index will quickly become stale.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoNoTenantIndexBuilders
expr: |
sum by (cluster, namespace, tenant) (tempodb_blocklist_tenant_index_builder{}) == 0 and
max by (cluster, namespace) (tempodb_blocklist_length{}) > 0
for: 5m
labels:
severity: critical
- alert: TempoTenantIndexTooOld
annotations:
message: Tenant index age is 600 seconds old for tenant {{`{{`}} $labels.tenant {{`}}`}}.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoTenantIndexTooOld
expr: |
max by (cluster, namespace, tenant) (tempodb_blocklist_tenant_index_age_seconds{}) > 600
for: 5m
labels:
severity: critical
- alert: TempoBadOverrides
annotations:
message: '{{`{{`}} $labels.job {{`}}`}} failed to reload overrides.'
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoBadOverrides
expr: |
sum(tempo_runtime_config_last_reload_successful{namespace=~".*"} == 0) by (cluster, namespace, job)
for: 15m
labels:
severity: warning
- alert: TempoProvisioningTooManyWrites
annotations:
message: Ingesters in {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} are receiving more data/second than desired, add more ingesters.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoProvisioningTooManyWrites
expr: |
avg by (cluster, namespace) (rate(tempo_ingester_bytes_received_total{job=~".+/ingester"}[1m])) / 1024 / 1024 > 30
for: 15m
labels:
severity: warning
- alert: TempoCompactorsTooManyOutstandingBlocks
annotations:
message: There are too many outstanding compaction blocks in {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} for tenant {{`{{`}} $labels.tenant {{`}}`}}, increase compactor's CPU or add more compactors.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorsTooManyOutstandingBlocks
expr: |
sum by (cluster, namespace, tenant) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~".*"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~".*"}) by (cluster, namespace) > 100
for: 6h
labels:
severity: warning
- alert: TempoCompactorsTooManyOutstandingBlocks
annotations:
message: There are too many outstanding compaction blocks in {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} for tenant {{`{{`}} $labels.tenant {{`}}`}}, increase compactor's CPU or add more compactors.
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorsTooManyOutstandingBlocks
expr: |
sum by (cluster, namespace, tenant) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~".*"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~".*"}) by (cluster, namespace) > 250
for: 24h
labels:
severity: critical
- alert: TempoIngesterReplayErrors
annotations:
message: Tempo ingester has encountered errors while replaying a block on startup in {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} for tenant {{`{{`}} $labels.tenant {{`}}`}}
runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoIngesterReplayErrors
expr: |
sum by (cluster, namespace, tenant) (increase(tempo_ingester_replay_errors_total{namespace=~".*"}[5m])) > 0
for: 5m
labels:
severity: critical
Loading

0 comments on commit 93f8231

Please sign in to comment.