Skip to content

Commit

Permalink
Remove ruler.drain-notification-queue-on-shutdown option, and enable …
Browse files Browse the repository at this point in the history
…it by default (#9115)

* Remove -ruler.drain-notification-queue-on-shutdown option completely.

Signed-off-by: Peter Štibraný <[email protected]>

---------

Signed-off-by: Peter Štibraný <[email protected]>
  • Loading branch information
pstibrany authored Aug 28, 2024
1 parent ba81e66 commit fdb1225
Show file tree
Hide file tree
Showing 7 changed files with 6 additions and 27 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* [CHANGE] Distributor: Replace `-distributor.retry-after-header.max-backoff-exponent` and `-distributor.retry-after-header.base-seconds` with `-distributor.retry-after-header.min-backoff` and `-distributor.retry-after-header.max-backoff` for easier configuration. #8694
* [CHANGE] Ingester: increase the default inactivity timeout of active series (`-ingester.active-series-metrics-idle-timeout`) from `10m` to `20m`. #8975
* [CHANGE] Distributor: Remove `-distributor.enable-otlp-metadata-storage` flag, which was deprecated in version 2.12. #9069
* [CHANGE] Ruler: Removed `-ruler.drain-notification-queue-on-shutdown` option, which is now enabled by default. #9115
* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.query-engine=mimir`. #8422 #8430 #8454 #8455 #8360 #8490 #8508 #8577 #8660 #8671 #8677 #8747 #8850 #8872 #8838 #8911 #8909 #8923 #8924 #8925 #8932 #8933 #8934 #8962 #8986 #8993 #8995 #9017 #9018 #9008 #9120 #9121
* [FEATURE] Experimental Kafka-based ingest storage. #6888 #6894 #6929 #6940 #6951 #6974 #6982 #7029 #7030 #7091 #7142 #7147 #7148 #7153 #7160 #7193 #7349 #7376 #7388 #7391 #7393 #7394 #7402 #7404 #7423 #7424 #7437 #7486 #7503 #7508 #7540 #7621 #7682 #7685 #7694 #7695 #7696 #7697 #7701 #7733 #7734 #7741 #7752 #7838 #7851 #7871 #7877 #7880 #7882 #7887 #7891 #7925 #7955 #7967 #8031 #8063 #8077 #8088 #8135 #8176 #8184 #8194 #8216 #8217 #8222 #8233 #8503 #8542 #8579 #8657 #8686 #8688 #8703 #8706 #8708 #8738 #8750 #8778 #8808 #8809 #8841 #8842 #8845 #8853 #8886 #8988
* What it is:
Expand Down
11 changes: 0 additions & 11 deletions cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -11804,17 +11804,6 @@
"fieldValue": null,
"fieldDefaultValue": null
},
{
"kind": "field",
"name": "drain_notification_queue_on_shutdown",
"required": false,
"desc": "Drain all outstanding alert notifications when shutting down. If false, any outstanding alert notifications are dropped when shutting down.",
"fieldValue": null,
"fieldDefaultValue": false,
"fieldFlag": "ruler.drain-notification-queue-on-shutdown",
"fieldType": "boolean",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "for_outage_tolerance",
Expand Down
2 changes: 0 additions & 2 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -2623,8 +2623,6 @@ Usage of ./cmd/mimir/mimir:
Override the expected name on the server certificate.
-ruler.disabled-tenants comma-separated-list-of-strings
Comma separated list of tenants whose rules this ruler cannot evaluate. If specified, a ruler that would normally pick the specified tenant(s) for processing will ignore them instead. Subject to sharding.
-ruler.drain-notification-queue-on-shutdown
[experimental] Drain all outstanding alert notifications when shutting down. If false, any outstanding alert notifications are dropped when shutting down.
-ruler.enable-api
Enable the ruler config API. (default true)
-ruler.enabled-tenants comma-separated-list-of-strings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1964,11 +1964,6 @@ alertmanager_client:
# CLI flag: -ruler.alertmanager-client.basic-auth-password
[basic_auth_password: <string> | default = ""]
# (experimental) Drain all outstanding alert notifications when shutting down.
# If false, any outstanding alert notifications are dropped when shutting down.
# CLI flag: -ruler.drain-notification-queue-on-shutdown
[drain_notification_queue_on_shutdown: <boolean> | default = false]
# (advanced) Max time to tolerate outage for restoring "for" state of alert.
# CLI flag: -ruler.for-outage-tolerance
[for_outage_tolerance: <duration> | default = 1h]
Expand Down
2 changes: 1 addition & 1 deletion pkg/ruler/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ func (r *DefaultMultiTenantManager) getOrCreateNotifier(userID string) (*notifie
var err error
if n, err = newRulerNotifier(&notifier.Options{
QueueCapacity: r.cfg.NotificationQueueCapacity,
DrainOnShutdown: r.cfg.DrainNotificationQueueOnShutdown,
DrainOnShutdown: true,
Registerer: reg,
Do: func(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) {
// Note: The passed-in context comes from the Prometheus notifier
Expand Down
9 changes: 4 additions & 5 deletions pkg/ruler/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,10 @@ func TestDefaultMultiTenantManager_WaitsToDrainPendingNotificationsOnShutdown(t
user1Group1 := createRuleGroup("group-1", user, createRecordingRule("count:metric_1", "count(metric_1)"))

cfg := Config{
RulePath: t.TempDir(),
AlertmanagerURL: server.URL,
NotificationQueueCapacity: 1000,
NotificationTimeout: 10 * time.Second,
DrainNotificationQueueOnShutdown: true,
RulePath: t.TempDir(),
AlertmanagerURL: server.URL,
NotificationQueueCapacity: 1000,
NotificationTimeout: 10 * time.Second,
}
m, err := NewDefaultMultiTenantManager(cfg, managerMockFactory, nil, logger, nil)
require.NoError(t, err)
Expand Down
3 changes: 0 additions & 3 deletions pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ type Config struct {
NotificationTimeout time.Duration `yaml:"notification_timeout" category:"advanced"`
// Client configs for interacting with the Alertmanager
Notifier NotifierConfig `yaml:"alertmanager_client"`
// Enable draining the pending alert notification queue when shutting down.
DrainNotificationQueueOnShutdown bool `yaml:"drain_notification_queue_on_shutdown" category:"experimental"`

// Max time to tolerate outage for restoring "for" state of alert.
OutageTolerance time.Duration `yaml:"for_outage_tolerance" category:"advanced"`
Expand Down Expand Up @@ -182,7 +180,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
f.DurationVar(&cfg.AlertmanagerRefreshInterval, "ruler.alertmanager-refresh-interval", 1*time.Minute, "How long to wait between refreshing DNS resolutions of Alertmanager hosts.")
f.IntVar(&cfg.NotificationQueueCapacity, "ruler.notification-queue-capacity", 10000, "Capacity of the queue for notifications to be sent to the Alertmanager.")
f.DurationVar(&cfg.NotificationTimeout, "ruler.notification-timeout", 10*time.Second, "HTTP timeout duration when sending notifications to the Alertmanager.")
f.BoolVar(&cfg.DrainNotificationQueueOnShutdown, "ruler.drain-notification-queue-on-shutdown", false, "Drain all outstanding alert notifications when shutting down. If false, any outstanding alert notifications are dropped when shutting down.")

f.StringVar(&cfg.RulePath, "ruler.rule-path", "./data-ruler/", "Directory to store temporary rule files loaded by the Prometheus rule managers. This directory is not required to be persisted between restarts.")
f.BoolVar(&cfg.EnableAPI, "ruler.enable-api", true, "Enable the ruler config API.")
Expand Down

0 comments on commit fdb1225

Please sign in to comment.