-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1046 from cloudflare/alerts/absent
Added alerts/absent check
- Loading branch information
Showing
16 changed files
with
716 additions
and
194 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
--- | ||
layout: default | ||
parent: Checks | ||
grand_parent: Documentation | ||
--- | ||
|
||
# alerts/absent | ||
|
||
This check will warn you about alerting rules that are using `absent()` calls without having `for` option set | ||
to at least 2x scrape interval. | ||
Using `absent()` without `for` can cause false positive alerts when Prometheus is restarted and the rule | ||
is evaluated before the metrics tested using `absent()` are scraped. Adding a `for` option with at least | ||
2x scrape interval is usually enough to prevent this from happening. | ||
|
||
## Configuration | ||
|
||
This check doesn't have any configuration options. | ||
|
||
## How to enable it | ||
|
||
This check is enabled by default for all configured Prometheus servers. | ||
|
||
Example: | ||
|
||
```js | ||
prometheus "prod" { | ||
uri = "https://prometheus-prod.example.com" | ||
timeout = "60s" | ||
include = [ | ||
"rules/prod/.*", | ||
"rules/common/.*", | ||
] | ||
} | ||
|
||
prometheus "dev" { | ||
uri = "https://prometheus-dev.example.com" | ||
timeout = "30s" | ||
include = [ | ||
"rules/dev/.*", | ||
"rules/common/.*", | ||
] | ||
} | ||
``` | ||
|
||
## How to disable it | ||
|
||
You can disable this check globally by adding this config block: | ||
|
||
```js | ||
checks { | ||
disabled = ["alerts/absent"] | ||
} | ||
``` | ||
|
||
You can also disable it for all rules inside given file by adding | ||
a comment anywhere in that file. Example: | ||
|
||
```yaml | ||
# pint file/disable alerts/absent | ||
``` | ||
|
||
Or you can disable it per rule by adding a comment to it. Example: | ||
|
||
```yaml | ||
# pint disable alerts/absent | ||
``` | ||
|
||
If you want to disable only individual instances of this check | ||
you can add a more specific comment. | ||
|
||
```yaml | ||
# pint disable alerts/absent($prometheus) | ||
``` | ||
|
||
Where `$prometheus` is the name of Prometheus server to disable. | ||
|
||
Example: | ||
|
||
```yaml | ||
# pint disable alerts/absent(prod) | ||
``` | ||
|
||
## How to snooze it | ||
|
||
You can disable this check until given time by adding a comment to it. Example: | ||
|
||
```yaml | ||
# pint snooze $TIMESTAMP alerts/absent | ||
``` | ||
|
||
Where `$TIMESTAMP` is either use [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) | ||
formatted or `YYYY-MM-DD`. | ||
Adding this comment will disable `alerts/absent` *until* `$TIMESTAMP`, after that | ||
check will be re-enabled. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package checks | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
"github.com/cloudflare/pint/internal/discovery" | ||
"github.com/cloudflare/pint/internal/output" | ||
"github.com/cloudflare/pint/internal/parser" | ||
"github.com/cloudflare/pint/internal/promapi" | ||
|
||
"github.com/prometheus/common/model" | ||
promParser "github.com/prometheus/prometheus/promql/parser" | ||
) | ||
|
||
const ( | ||
AlertsAbsentCheckName = "alerts/absent" | ||
AlertsAbsentCheckDetails = "When Prometheus restart this alert rule might be evaluated before your service is scraped, which can cause false positives from absent() call.\nAdding `for` option that is at least 2x scrape interval will prevent this from happening." | ||
) | ||
|
||
func NewAlertsAbsentCheck(prom *promapi.FailoverGroup) AlertsAbsentCheck { | ||
return AlertsAbsentCheck{ | ||
prom: prom, | ||
} | ||
} | ||
|
||
type AlertsAbsentCheck struct { | ||
prom *promapi.FailoverGroup | ||
} | ||
|
||
func (c AlertsAbsentCheck) Meta() CheckMeta { | ||
return CheckMeta{ | ||
States: []discovery.ChangeType{ | ||
discovery.Noop, | ||
discovery.Added, | ||
discovery.Modified, | ||
discovery.Moved, | ||
}, | ||
IsOnline: true, | ||
} | ||
} | ||
|
||
func (c AlertsAbsentCheck) String() string { | ||
return fmt.Sprintf("%s(%s)", AlertsAbsentCheckName, c.prom.Name()) | ||
} | ||
|
||
func (c AlertsAbsentCheck) Reporter() string { | ||
return AlertsAbsentCheckName | ||
} | ||
|
||
func (c AlertsAbsentCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Rule, _ []discovery.Entry) (problems []Problem) { | ||
if rule.AlertingRule == nil { | ||
return problems | ||
} | ||
|
||
if rule.AlertingRule.Expr.SyntaxError != nil { | ||
return problems | ||
} | ||
|
||
if n, ok := rule.AlertingRule.Expr.Query.Expr.(*promParser.Call); !ok || n.Func.Name != "absent" { | ||
return problems | ||
} | ||
|
||
cfg, err := c.prom.Config(ctx, 0) | ||
if err != nil { | ||
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Warning) | ||
problems = append(problems, Problem{ | ||
Lines: rule.AlertingRule.Expr.Value.Lines, | ||
Reporter: c.Reporter(), | ||
Text: text, | ||
Severity: severity, | ||
}) | ||
return problems | ||
} | ||
|
||
if rule.AlertingRule.For != nil { | ||
forDur, err := model.ParseDuration(rule.AlertingRule.For.Value) | ||
if err != nil { | ||
return problems | ||
} | ||
if time.Duration(forDur) >= cfg.Config.Global.ScrapeInterval*2 { | ||
return problems | ||
} | ||
} | ||
|
||
problems = append(problems, Problem{ | ||
Lines: rule.AlertingRule.Expr.Value.Lines, | ||
Reporter: c.Reporter(), | ||
Text: fmt.Sprintf("Alert query is using absent() which might cause false positives when %s restarts, please add `for: %s` to avoid this.", | ||
promText(c.prom.Name(), cfg.URI), | ||
output.HumanizeDuration((cfg.Config.Global.ScrapeInterval * 2).Round(time.Minute)), | ||
), | ||
Details: AlertsAbsentCheckDetails, | ||
Severity: Warning, | ||
}) | ||
|
||
return problems | ||
} |
Oops, something went wrong.