From ca7814ed9b730a31e1196e4db9eacf65a1444a36 Mon Sep 17 00:00:00 2001 From: Tito Lins Date: Fri, 27 Sep 2024 20:13:20 +0200 Subject: [PATCH] Alertmanager: Add grafana config size limit (#9402) * refactor SetUserGrafanaConfig unit tests * add config size limit/flag * implement config limit * add changelog * update docs * update help (make reference-help) * add PR number to changelog * update docs (make docs) * adjust help/docs wording * move CHANGELOG to appropriate section * fix changelog * fix changelog * one-liner errors.As Co-authored-by: Oleg Zaytsev * Update CHANGELOG.md Co-authored-by: Steve Simpson --------- Co-authored-by: Oleg Zaytsev Co-authored-by: Steve Simpson --- CHANGELOG.md | 1 + cmd/mimir/config-descriptor.json | 10 + cmd/mimir/help-all.txt.tmpl | 2 + cmd/mimir/help.txt.tmpl | 2 + .../configuration-parameters/index.md | 5 + pkg/alertmanager/api_grafana.go | 18 +- pkg/alertmanager/api_grafana_test.go | 175 +++++++++++------- pkg/alertmanager/multitenant.go | 3 + pkg/alertmanager/multitenant_test.go | 5 + pkg/util/validation/limits.go | 6 + 10 files changed, 164 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b54e0a6e0e2..cb519c57c93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ * `-query-scheduler.grpc-client-config.grpc-compression=s2` * `-ruler.client.grpc-compression=s2` * `-ruler.query-frontend.grpc-client-config.grpc-compression=s2` +* [FEATURE] Alertmanager: limit added for maximum size of the Grafana configuration (`-alertmanager.max-config-size-bytes`). #9402 * [ENHANCEMENT] Ruler: Support `exclude_alerts` parameter in `/api/v1/rules` endpoint. #9300 * [ENHANCEMENT] Distributor: add a metric to track tenants who are sending newlines in their label values called `cortex_distributor_label_values_with_newlines_total`. #9400 diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index c00b8c38afe..458885e1bb5 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -4575,6 +4575,16 @@ "fieldFlag": "alertmanager.notification-rate-limit-per-integration", "fieldType": "map of string to float64" }, + { + "kind": "field", + "name": "alertmanager_max_grafana_config_size_bytes", + "required": false, + "desc": "Maximum size of the Grafana configuration file for Alertmanager that a tenant can upload via the Alertmanager API. 0 = no limit.", + "fieldValue": null, + "fieldDefaultValue": 0, + "fieldFlag": "alertmanager.max-grafana-config-size-bytes", + "fieldType": "int" + }, { "kind": "field", "name": "alertmanager_max_config_size_bytes", diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index caf67704529..02d2ee30a7b 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -203,6 +203,8 @@ Usage of ./cmd/mimir/mimir: Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit. -alertmanager.max-dispatcher-aggregation-groups int Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit. + -alertmanager.max-grafana-config-size-bytes int + Maximum size of the Grafana configuration file for Alertmanager that a tenant can upload via the Alertmanager API. 0 = no limit. -alertmanager.max-recv-msg-size int Maximum size (bytes) of an accepted HTTP request body. (default 104857600) -alertmanager.max-silence-size-bytes int diff --git a/cmd/mimir/help.txt.tmpl b/cmd/mimir/help.txt.tmpl index f5cb59a299f..d337867bfbd 100644 --- a/cmd/mimir/help.txt.tmpl +++ b/cmd/mimir/help.txt.tmpl @@ -89,6 +89,8 @@ Usage of ./cmd/mimir/mimir: Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit. -alertmanager.max-dispatcher-aggregation-groups int Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit. + -alertmanager.max-grafana-config-size-bytes int + Maximum size of the Grafana configuration file for Alertmanager that a tenant can upload via the Alertmanager API. 0 = no limit. -alertmanager.max-silence-size-bytes int Maximum silence size in bytes. 0 = no limit. -alertmanager.max-silences-count int diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md index 3e56d39773a..6acdebc9c70 100644 --- a/docs/sources/mimir/configure/configuration-parameters/index.md +++ b/docs/sources/mimir/configure/configuration-parameters/index.md @@ -3644,6 +3644,11 @@ The `limits` block configures default and per-tenant limits imposed by component # CLI flag: -alertmanager.notification-rate-limit-per-integration [alertmanager_notification_rate_limit_per_integration: | default = {}] +# Maximum size of the Grafana configuration file for Alertmanager that a tenant +# can upload via the Alertmanager API. 0 = no limit. +# CLI flag: -alertmanager.max-grafana-config-size-bytes +[alertmanager_max_grafana_config_size_bytes: | default = 0] + # Maximum size of configuration file for Alertmanager that tenant can upload via # Alertmanager API. 0 = no limit. # CLI flag: -alertmanager.max-config-size-bytes diff --git a/pkg/alertmanager/api_grafana.go b/pkg/alertmanager/api_grafana.go index f728f63aa64..1814d67d47b 100644 --- a/pkg/alertmanager/api_grafana.go +++ b/pkg/alertmanager/api_grafana.go @@ -309,8 +309,24 @@ func (am *MultitenantAlertmanager) SetUserGrafanaConfig(w http.ResponseWriter, r return } - payload, err := io.ReadAll(r.Body) + var input io.Reader + maxConfigSize := am.limits.AlertmanagerMaxGrafanaConfigSize(userID) + if maxConfigSize > 0 { + input = http.MaxBytesReader(w, r.Body, int64(maxConfigSize)) + } else { + input = r.Body + } + + payload, err := io.ReadAll(input) if err != nil { + if maxBytesErr := (&http.MaxBytesError{}); errors.As(err, &maxBytesErr) { + msg := fmt.Sprintf(errConfigurationTooBig, maxConfigSize) + level.Warn(logger).Log("msg", msg) + w.WriteHeader(http.StatusBadRequest) + util.WriteJSONResponse(w, errorResult{Status: statusError, Error: msg}) + return + } + level.Error(logger).Log("msg", errReadingGrafanaConfig, "err", err.Error()) w.WriteHeader(http.StatusBadRequest) util.WriteJSONResponse(w, errorResult{Status: statusError, Error: fmt.Sprintf("%s: %s", errReadingGrafanaConfig, err.Error())}) diff --git a/pkg/alertmanager/api_grafana_test.go b/pkg/alertmanager/api_grafana_test.go index 308ae1d8a93..d9464377868 100644 --- a/pkg/alertmanager/api_grafana_test.go +++ b/pkg/alertmanager/api_grafana_test.go @@ -308,74 +308,125 @@ func TestMultitenantAlertmanager_SetUserGrafanaConfig(t *testing.T) { storage := objstore.NewInMemBucket() alertstore := bucketclient.NewBucketAlertStore(bucketclient.BucketAlertStoreConfig{}, storage, nil, log.NewNopLogger()) - am := &MultitenantAlertmanager{ - store: alertstore, - logger: test.NewTestingLogger(t), - } - - require.Len(t, storage.Objects(), 0) - req := httptest.NewRequest(http.MethodPost, "/api/v1/grafana/config", nil) - { - rec := httptest.NewRecorder() - am.SetUserGrafanaConfig(rec, req) - require.Equal(t, http.StatusUnauthorized, rec.Code) - require.Len(t, storage.Objects(), 0) - } - - ctx := user.InjectOrgID(context.Background(), "test_user") - req = req.WithContext(ctx) - { - // First, try with invalid configuration. - rec := httptest.NewRecorder() - json := ` + cases := []struct { + name string + maxConfigSize int + orgID string + body string + expStatusCode int + expResponseBody string + expStorageKey string + }{ { - "configuration_hash": "some_hash", - "created": 12312414343, - "default": false - } - ` - req.Body = io.NopCloser(strings.NewReader(json)) - am.SetUserGrafanaConfig(rec, req) - require.Equal(t, http.StatusBadRequest, rec.Code) - body, err := io.ReadAll(rec.Body) - require.NoError(t, err) - failedJSON := ` + name: "missing org id", + expStatusCode: http.StatusUnauthorized, + }, { - "error": "error marshalling JSON Grafana Alertmanager config: no route provided in config", - "status": "error" - } - ` - require.JSONEq(t, failedJSON, string(body)) - require.Equal(t, "application/json", rec.Header().Get("Content-Type")) - - // Now, with a valid configuration. - rec = httptest.NewRecorder() - json = fmt.Sprintf(` + name: "config size > max size", + body: fmt.Sprintf(` + { + "configuration": %s, + "configuration_hash": "ChEKBW5mbG9nEghzb21lZGF0YQ==", + "created": 12312414343, + "default": false, + "promoted": true, + "external_url": "http://test.grafana.com", + "static_headers": { + "Header-1": "Value-1", + "Header-2": "Value-2" + } + } + `, testGrafanaConfig), + orgID: "test_user", + maxConfigSize: 10, + expStatusCode: http.StatusBadRequest, + expResponseBody: ` + { + "error": "Alertmanager configuration is too big, limit: 10 bytes", + "status": "error" + } + `, + }, { - "configuration": %s, - "configuration_hash": "ChEKBW5mbG9nEghzb21lZGF0YQ==", - "created": 12312414343, - "default": false, - "promoted": true, - "external_url": "http://test.grafana.com", - "static_headers": { - "Header-1": "Value-1", - "Header-2": "Value-2" + name: "invalid config", + body: ` + { + "configuration_hash": "some_hash", + "created": 12312414343, + "default": false } - } - `, testGrafanaConfig) - req.Body = io.NopCloser(strings.NewReader(json)) - am.SetUserGrafanaConfig(rec, req) + `, + orgID: "test_user", + expStatusCode: http.StatusBadRequest, + expResponseBody: ` + { + "error": "error marshalling JSON Grafana Alertmanager config: no route provided in config", + "status": "error" + } + `, + }, + { + name: "with valid config", + body: fmt.Sprintf(` + { + "configuration": %s, + "configuration_hash": "ChEKBW5mbG9nEghzb21lZGF0YQ==", + "created": 12312414343, + "default": false, + "promoted": true, + "external_url": "http://test.grafana.com", + "static_headers": { + "Header-1": "Value-1", + "Header-2": "Value-2" + } + } + `, testGrafanaConfig), + orgID: "test_user", + expStatusCode: http.StatusCreated, + expResponseBody: successJSON, + expStorageKey: "grafana_alertmanager/test_user/grafana_config", + }, + } - require.Equal(t, http.StatusCreated, rec.Code) - body, err = io.ReadAll(rec.Body) - require.NoError(t, err) - require.JSONEq(t, successJSON, string(body)) - require.Equal(t, "application/json", rec.Header().Get("Content-Type")) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + am := &MultitenantAlertmanager{ + store: alertstore, + logger: test.NewTestingLogger(t), + limits: &mockAlertManagerLimits{ + maxGrafanaConfigSize: tc.maxConfigSize, + }, + } + rec := httptest.NewRecorder() + ctx := context.Background() + if tc.orgID != "" { + ctx = user.InjectOrgID(ctx, "test_user") + } - require.Len(t, storage.Objects(), 1) - _, ok := storage.Objects()["grafana_alertmanager/test_user/grafana_config"] - require.True(t, ok) + req := httptest.NewRequest( + http.MethodPost, + "/api/v1/grafana/config", + io.NopCloser(strings.NewReader(tc.body)), + ).WithContext(ctx) + + am.SetUserGrafanaConfig(rec, req) + require.Equal(t, tc.expStatusCode, rec.Code) + + if tc.expResponseBody != "" { + body, err := io.ReadAll(rec.Body) + require.NoError(t, err) + + require.JSONEq(t, tc.expResponseBody, string(body)) + } + + if tc.expStorageKey == "" { + require.Len(t, storage.Objects(), 0) + } else { + require.Len(t, storage.Objects(), 1) + _, ok := storage.Objects()[tc.expStorageKey] + require.True(t, ok) + } + }) } } diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go index 56b9c22bb7a..6cfa10f231f 100644 --- a/pkg/alertmanager/multitenant.go +++ b/pkg/alertmanager/multitenant.go @@ -234,6 +234,9 @@ type Limits interface { // when limit == rate.Inf. NotificationBurstSize(tenant string, integration string) int + // AlertmanagerMaxGrafanaConfigSize returns max size of the grafana configuration file that user is allowed to upload. If 0, there is no limit. + AlertmanagerMaxGrafanaConfigSize(tenant string) int + // AlertmanagerMaxConfigSize returns max size of configuration file that user is allowed to upload. If 0, there is no limit. AlertmanagerMaxConfigSize(tenant string) int diff --git a/pkg/alertmanager/multitenant_test.go b/pkg/alertmanager/multitenant_test.go index 75497ad2f96..a88e4d06d2d 100644 --- a/pkg/alertmanager/multitenant_test.go +++ b/pkg/alertmanager/multitenant_test.go @@ -3293,6 +3293,7 @@ type mockAlertManagerLimits struct { emailNotificationRateLimit rate.Limit emailNotificationBurst int maxConfigSize int + maxGrafanaConfigSize int maxSilencesCount int maxSilenceSizeBytes int maxTemplatesCount int @@ -3306,6 +3307,10 @@ func (m *mockAlertManagerLimits) AlertmanagerMaxConfigSize(string) int { return m.maxConfigSize } +func (m *mockAlertManagerLimits) AlertmanagerMaxGrafanaConfigSize(string) int { + return m.maxGrafanaConfigSize +} + func (m *mockAlertManagerLimits) AlertmanagerMaxSilencesCount(string) int { return m.maxSilencesCount } func (m *mockAlertManagerLimits) AlertmanagerMaxSilenceSizeBytes(string) int { diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 463f4e81f62..221b09105ab 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -220,6 +220,7 @@ type Limits struct { NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"` NotificationRateLimitPerIntegration LimitsMap[float64] `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"` + AlertmanagerMaxGrafanaConfigSizeBytes int `yaml:"alertmanager_max_grafana_config_size_bytes" json:"alertmanager_max_grafana_config_size_bytes"` AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"` AlertmanagerMaxSilencesCount int `yaml:"alertmanager_max_silences_count" json:"alertmanager_max_silences_count"` AlertmanagerMaxSilenceSizeBytes int `yaml:"alertmanager_max_silence_size_bytes" json:"alertmanager_max_silence_size_bytes"` @@ -368,6 +369,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { l.NotificationRateLimitPerIntegration = NotificationRateLimitMap() } f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".") + f.IntVar(&l.AlertmanagerMaxGrafanaConfigSizeBytes, "alertmanager.max-grafana-config-size-bytes", 0, "Maximum size of the Grafana configuration file for Alertmanager that a tenant can upload via the Alertmanager API. 0 = no limit.") f.IntVar(&l.AlertmanagerMaxConfigSizeBytes, "alertmanager.max-config-size-bytes", 0, "Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.") f.IntVar(&l.AlertmanagerMaxSilencesCount, "alertmanager.max-silences-count", 0, "Maximum number of silences, including expired silences, that a tenant can have at once. 0 = no limit.") f.IntVar(&l.AlertmanagerMaxSilenceSizeBytes, "alertmanager.max-silence-size-bytes", 0, "Maximum silence size in bytes. 0 = no limit.") @@ -1003,6 +1005,10 @@ func (o *Overrides) NotificationBurstSize(user string, integration string) int { return int(l) } +func (o *Overrides) AlertmanagerMaxGrafanaConfigSize(userID string) int { + return o.getOverridesForUser(userID).AlertmanagerMaxGrafanaConfigSizeBytes +} + func (o *Overrides) AlertmanagerMaxConfigSize(userID string) int { return o.getOverridesForUser(userID).AlertmanagerMaxConfigSizeBytes }