Skip to content

Commit

Permalink
Include query fragments in alerts/template
Browse files Browse the repository at this point in the history
  • Loading branch information
prymitive committed Oct 29, 2024
1 parent 128fada commit 1331c38
Show file tree
Hide file tree
Showing 9 changed files with 395 additions and 148 deletions.
2 changes: 1 addition & 1 deletion cmd/pint/tests/0076_ci_group_errors.txt
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ rules.yml:29-30 Bug: `summary` annotation is required. (alerts/annotation)
29 | annotations:
30 | instance: 'sum on {{ $labels.instance }} is {{ $value }}'

rules.yml:30 Bug: Template is using `instance` label but the query removes it. (alerts/template)
rules.yml:30 Bug: Template is using `instance` label but the query results won't have this label. Query is using aggregation with `by(foo)`, only labels included inside `by(...)` will be present on the results. (alerts/template)
30 | instance: 'sum on {{ $labels.instance }} is {{ $value }}'

rules.yml:32-33 Bug: `link` annotation is required. (alerts/annotation)
Expand Down
6 changes: 3 additions & 3 deletions cmd/pint/tests/0087_dedup.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ level=INFO msg="Finding all rules to check" paths=["rules"]
rules/01.yml:5 Warning: Alert query doesn't have any condition, it will always fire if the metric exists. (alerts/comparison)
5 | expr: sum(up{job="bar"}) / sum(foo) / sum(bar)

rules/01.yml:12 Bug: Template is using `cluster` label but the query removes it. (alerts/template)
rules/01.yml:12 Bug: Template is using `cluster` label but the query results won't have this label. Query is using aggregation that removes all labels. (alerts/template)
12 | summary: "Server {{ $labels.instance }} in cluster {{ $labels.cluster }} has gone down"

rules/01.yml:12 Bug: Template is using `instance` label but the query removes it. (alerts/template)
rules/01.yml:12 Bug: Template is using `instance` label but the query results won't have this label. Query is using aggregation that removes all labels. (alerts/template)
12 | summary: "Server {{ $labels.instance }} in cluster {{ $labels.cluster }} has gone down"

rules/01.yml:13 Bug: Template is using `cluster` label but the query removes it. (alerts/template)
rules/01.yml:13 Bug: Template is using `cluster` label but the query results won't have this label. Query is using aggregation that removes all labels. (alerts/template)
13 | dashboard: "https://grafana.example.com/dashboard?var-cluster={{ $labels.cluster }}&var-instance={{ $labels.cluster }}"

level=INFO msg="Problems found" Bug=3 Warning=1
Expand Down
7 changes: 6 additions & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,15 @@
}
```

### Changed

- [alerts/template](checks/alerts/template.md) check was refactored and will now produce more accurate results.
Messages produced by this check might include details of the PromQL query fragment causing the problem
if the query is complex enough.

### Fixed

- Don't try to create GitLab comments on unmodified lines - [#1147](https://github.com/cloudflare/pint/pull/1147).
- [alerts/template](checks/alerts/template.md) check was refactored and will now produce more accurate results.
## v0.67.0
Expand Down
2 changes: 1 addition & 1 deletion internal/checks/alerts_absent.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (c AlertsAbsentCheck) Check(ctx context.Context, _ discovery.Path, rule par
}

var hasAbsent bool
src := utils.LabelsSource(rule.AlertingRule.Expr.Query)
src := utils.LabelsSource(rule.AlertingRule.Expr.Value.Value, rule.AlertingRule.Expr.Query)
for _, s := range append(src.Alternatives, src) {
if s.Operation == "absent" {
hasAbsent = true
Expand Down
37 changes: 20 additions & 17 deletions internal/checks/alerts_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func (c TemplateCheck) Check(ctx context.Context, _ discovery.Path, rule parser.
return nil
}

src := utils.LabelsSource(rule.AlertingRule.Expr.Query)
src := utils.LabelsSource(rule.AlertingRule.Expr.Value.Value, rule.AlertingRule.Expr.Query)
data := promTemplate.AlertTemplateData(map[string]string{}, map[string]string{}, "", promql.Sample{})

if rule.AlertingRule.Labels != nil {
Expand Down Expand Up @@ -144,7 +144,7 @@ func (c TemplateCheck) Check(ctx context.Context, _ discovery.Path, rule parser.
})
}

for _, problem := range checkQueryLabels(label.Key.Value, label.Value.Value, src) {
for _, problem := range checkQueryLabels(rule.AlertingRule.Expr.Value.Value, label.Key.Value, label.Value.Value, src) {
problems = append(problems, Problem{
Lines: parser.LineRange{
First: label.Key.Lines.First,
Expand Down Expand Up @@ -174,7 +174,7 @@ func (c TemplateCheck) Check(ctx context.Context, _ discovery.Path, rule parser.
})
}

for _, problem := range checkQueryLabels(annotation.Key.Value, annotation.Value.Value, src) {
for _, problem := range checkQueryLabels(rule.AlertingRule.Expr.Value.Value, annotation.Key.Value, annotation.Value.Value, src) {
problems = append(problems, Problem{
Lines: parser.LineRange{
First: annotation.Key.Lines.First,
Expand Down Expand Up @@ -436,7 +436,7 @@ func findTemplateVariables(name, text string) (vars [][]string, aliases aliasMap
return vars, aliases, true
}

func checkQueryLabels(labelName, labelValue string, src utils.Source) (problems []exprProblem) {
func checkQueryLabels(query, labelName, labelValue string, src utils.Source) (problems []exprProblem) {
vars, aliases, ok := findTemplateVariables(labelName, labelValue)
if !ok {
return nil
Expand All @@ -452,11 +452,11 @@ func checkQueryLabels(labelName, labelValue string, src utils.Source) (problems
}
for _, s := range append(src.Alternatives, src) {
if s.FixedLabels && !slices.Contains(s.IncludedLabels, v[1]) {
problems = append(problems, textForProblem(v[1], "", s, Bug))
problems = append(problems, textForProblem(query, v[1], "", s, Bug))
goto NEXT
}
if slices.Contains(s.ExcludedLabels, v[1]) {
problems = append(problems, textForProblem(v[1], v[1], s, Bug))
problems = append(problems, textForProblem(query, v[1], v[1], s, Bug))
goto NEXT
}
}
Expand All @@ -469,7 +469,7 @@ func checkQueryLabels(labelName, labelValue string, src utils.Source) (problems
return problems
}

func textForProblem(label, reasonLabel string, src utils.Source, severity Severity) exprProblem {
func textForProblem(query, label, reasonLabel string, src utils.Source, severity Severity) exprProblem {
switch {
case src.Operation == "absent":
return exprProblem{
Expand All @@ -489,23 +489,26 @@ func textForProblem(label, reasonLabel string, src utils.Source, severity Severi
details: TemplateCheckLabelsDetails,
severity: severity,
}
case slices.Contains([]string{
promParser.CardOneToOne.String(),
promParser.CardOneToMany.String(),
promParser.CardManyToMany.String(),
promParser.CardManyToOne.String(),
}, src.Operation):
case src.Operation == promParser.CardOneToOne.String():
return exprProblem{
text: fmt.Sprintf("Template is using `%s` label but the query results won't have this label. %s",
label, src.ExcludeReason[reasonLabel]),
details: TemplateCheckOnDetails,
label, src.ExcludeReason[reasonLabel].Reason),
details: maybeAddQueryFragment(query, src.ExcludeReason[reasonLabel].Fragment, TemplateCheckOnDetails),
severity: severity,
}
default:
return exprProblem{
text: fmt.Sprintf("Template is using `%s` label but the query removes it.", label),
details: TemplateCheckAggregationDetails,
text: fmt.Sprintf("Template is using `%s` label but the query results won't have this label. %s",
label, src.ExcludeReason[reasonLabel].Reason),
details: maybeAddQueryFragment(query, src.ExcludeReason[reasonLabel].Fragment, TemplateCheckAggregationDetails),
severity: severity,
}
}
}

func maybeAddQueryFragment(query, fragment, msg string) string {
if fragment == query {
return msg
}
return fmt.Sprintf("%s\nQuery fragment causing this problem: `%s`.", msg, fragment)
}
52 changes: 24 additions & 28 deletions internal/checks/alerts_template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ func TestTemplateCheck(t *testing.T) {
Last: 4,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `job` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `job` label but the query results won't have this label. Query is using aggregation that removes all labels.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `sum(foo)`.",
Severity: checks.Bug,
},
}
Expand All @@ -314,8 +314,8 @@ func TestTemplateCheck(t *testing.T) {
Last: 4,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `job` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `job` label but the query results won't have this label. Query is using aggregation that removes all labels.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `sum(foo)`.",
Severity: checks.Bug,
},
}
Expand All @@ -334,8 +334,8 @@ func TestTemplateCheck(t *testing.T) {
Last: 4,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `job` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `job` label but the query results won't have this label. Query is using aggregation with `without(job)`, all labels included inside `without(...)` will be removed from the results.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `sum(foo) without(job)`.",
Severity: checks.Bug,
},
}
Expand All @@ -354,8 +354,8 @@ func TestTemplateCheck(t *testing.T) {
Last: 4,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `job` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `job` label but the query results won't have this label. Query is using aggregation with `without(job)`, all labels included inside `without(...)` will be removed from the results.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `sum(foo) without(job)`.",
Severity: checks.Bug,
},
}
Expand All @@ -374,8 +374,8 @@ func TestTemplateCheck(t *testing.T) {
Last: 4,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `job` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `job` label but the query results won't have this label. Query is using aggregation with `without(job)`, all labels included inside `without(...)` will be removed from the results.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `sum(foo) without(job)`.",
Severity: checks.Bug,
},
}
Expand All @@ -394,8 +394,8 @@ func TestTemplateCheck(t *testing.T) {
Last: 4,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `job` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `job` label but the query results won't have this label. Query is using aggregation that removes all labels.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `sum(bar)`.",
Severity: checks.Bug,
},
}
Expand All @@ -414,8 +414,8 @@ func TestTemplateCheck(t *testing.T) {
Last: 4,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `job` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `job` label but the query results won't have this label. Query is using aggregation with `by(notjob)`, only labels included inside `by(...)` will be present on the results.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `sum(foo) by(notjob)`.",
Severity: checks.Bug,
},
}
Expand All @@ -440,8 +440,8 @@ func TestTemplateCheck(t *testing.T) {
Last: 6,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `ixtance` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `ixtance` label but the query results won't have this label. Query is using aggregation with `by(instance, version)`, only labels included inside `by(...)` will be present on the results.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `count(build_info) by (instance, version)`.",
Severity: checks.Bug,
},
}
Expand Down Expand Up @@ -1312,14 +1312,12 @@ func TestTemplateCheck(t *testing.T) {
{
description: "multiple or",
content: `
- alert: Prefix_Advertised_On_Very_Few_Routers
- alert: Foo
expr: >
avg without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case!~".*offpeak.*|.*multicolo.*|.*aggregate.*|.*test.*|.*tier1.*|.*regional.*|.*brat.*|.*utopia.*|.*byoip.*",prefix!~"141.101.112.0/20|190.93.240.0/20"})
avg without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case!~".*offpeak.*"})
< 0.5 > 0
or avg without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~".*multicolo.*"})
< 0.4 > 0
or sum without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~".*aggregate.*"} OR router_anycast_prefix_enabled{prefix=~"141.101.112.0/20|190.93.240.0/20"})
< 20 > 0
or sum without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~".*offpeak.*"})
< 8 > 0
or sum without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~".*tier1.*"})
Expand All @@ -1340,14 +1338,12 @@ func TestTemplateCheck(t *testing.T) {
{
description: "multiple or / missing group_left()",
content: `
- alert: Prefix_Advertised_On_Very_Few_Routers
- alert: Foo
expr: >
avg without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case!~".*offpeak.*|.*multicolo.*|.*aggregate.*|.*test.*|.*tier1.*|.*regional.*|.*brat.*|.*utopia.*|.*byoip.*",prefix!~"141.101.112.0/20|190.93.240.0/20"})
avg without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case!~".*offpeak.*"})
< 0.5 > 0
or avg without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~".*multicolo.*"})
< 0.4 > 0
or sum without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~".*aggregate.*"} OR router_anycast_prefix_enabled{prefix=~"141.101.112.0/20|190.93.240.0/20"})
< 20 > 0
or sum without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~".*offpeak.*"})
< 8 > 0
or sum without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~".*tier1.*"})
Expand All @@ -1367,12 +1363,12 @@ func TestTemplateCheck(t *testing.T) {
return []checks.Problem{
{
Lines: parser.LineRange{
First: 21,
Last: 21,
First: 19,
Last: 19,
},
Reporter: checks.TemplateCheckName,
Text: "Template is using `prefix` label but the query removes it.",
Details: checks.TemplateCheckAggregationDetails,
Text: "Template is using `prefix` label but the query results won't have this label. Query is using one-to-one vector matching with `on()`, only labels included inside `on(...)` will be present on the results.",
Details: checks.TemplateCheckAggregationDetails + "\nQuery fragment causing this problem: `sum without(router, colo_id, instance) (router_anycast_prefix_enabled{cidr_use_case=~\".*tier1.*\"}) < on() count(colo_router_tier:disabled_pops:max{tier=\"1\",router=~\"edge.*\"}) * 0.4`.",
Severity: checks.Bug,
},
}
Expand Down
6 changes: 3 additions & 3 deletions internal/checks/promql_fragile.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func (c FragileCheck) Check(_ context.Context, _ discovery.Path, rule parser.Rul
}

if rule.AlertingRule != nil {
for _, problem := range c.checkSampling(expr.Query) {
for _, problem := range c.checkSampling(expr.Value.Value, expr.Query) {
problems = append(problems, Problem{
Lines: expr.Value.Lines,
Reporter: c.Reporter(),
Expand Down Expand Up @@ -126,8 +126,8 @@ NEXT:
return problems
}

func (c FragileCheck) checkSampling(node *parser.PromQLNode) (problems []exprProblem) {
s := utils.LabelsSource(node)
func (c FragileCheck) checkSampling(expr string, node *parser.PromQLNode) (problems []exprProblem) {
s := utils.LabelsSource(expr, node)
for _, src := range append(s.Alternatives, s) {
if src.Type != utils.AggregateSource {
continue
Expand Down
Loading

0 comments on commit 1331c38

Please sign in to comment.