Skip to content

Commit

Permalink
Alertmanager: Add grafana config size limit (#9402)
Browse files Browse the repository at this point in the history
* refactor SetUserGrafanaConfig unit tests

* add config size limit/flag

* implement config limit

* add changelog

* update docs

* update help (make reference-help)

* add PR number to changelog

* update docs (make docs)

* adjust help/docs wording

* move CHANGELOG to appropriate section

* fix changelog

* fix changelog

* one-liner errors.As

Co-authored-by: Oleg Zaytsev <[email protected]>

* Update CHANGELOG.md

Co-authored-by: Steve Simpson <[email protected]>

---------

Co-authored-by: Oleg Zaytsev <[email protected]>
Co-authored-by: Steve Simpson <[email protected]>
  • Loading branch information
3 people authored Sep 27, 2024
1 parent 6c31ff7 commit ca7814e
Show file tree
Hide file tree
Showing 10 changed files with 164 additions and 63 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
* `-query-scheduler.grpc-client-config.grpc-compression=s2`
* `-ruler.client.grpc-compression=s2`
* `-ruler.query-frontend.grpc-client-config.grpc-compression=s2`
* [FEATURE] Alertmanager: limit added for maximum size of the Grafana configuration (`-alertmanager.max-config-size-bytes`). #9402
* [ENHANCEMENT] Ruler: Support `exclude_alerts` parameter in `<prometheus-http-prefix>/api/v1/rules` endpoint. #9300
* [ENHANCEMENT] Distributor: add a metric to track tenants who are sending newlines in their label values called `cortex_distributor_label_values_with_newlines_total`. #9400

Expand Down
10 changes: 10 additions & 0 deletions cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -4575,6 +4575,16 @@
"fieldFlag": "alertmanager.notification-rate-limit-per-integration",
"fieldType": "map of string to float64"
},
{
"kind": "field",
"name": "alertmanager_max_grafana_config_size_bytes",
"required": false,
"desc": "Maximum size of the Grafana configuration file for Alertmanager that a tenant can upload via the Alertmanager API. 0 = no limit.",
"fieldValue": null,
"fieldDefaultValue": 0,
"fieldFlag": "alertmanager.max-grafana-config-size-bytes",
"fieldType": "int"
},
{
"kind": "field",
"name": "alertmanager_max_config_size_bytes",
Expand Down
2 changes: 2 additions & 0 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ Usage of ./cmd/mimir/mimir:
Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.
-alertmanager.max-dispatcher-aggregation-groups int
Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit.
-alertmanager.max-grafana-config-size-bytes int
Maximum size of the Grafana configuration file for Alertmanager that a tenant can upload via the Alertmanager API. 0 = no limit.
-alertmanager.max-recv-msg-size int
Maximum size (bytes) of an accepted HTTP request body. (default 104857600)
-alertmanager.max-silence-size-bytes int
Expand Down
2 changes: 2 additions & 0 deletions cmd/mimir/help.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ Usage of ./cmd/mimir/mimir:
Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.
-alertmanager.max-dispatcher-aggregation-groups int
Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit.
-alertmanager.max-grafana-config-size-bytes int
Maximum size of the Grafana configuration file for Alertmanager that a tenant can upload via the Alertmanager API. 0 = no limit.
-alertmanager.max-silence-size-bytes int
Maximum silence size in bytes. 0 = no limit.
-alertmanager.max-silences-count int
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3644,6 +3644,11 @@ The `limits` block configures default and per-tenant limits imposed by component
# CLI flag: -alertmanager.notification-rate-limit-per-integration
[alertmanager_notification_rate_limit_per_integration: <map of string to float64> | default = {}]
# Maximum size of the Grafana configuration file for Alertmanager that a tenant
# can upload via the Alertmanager API. 0 = no limit.
# CLI flag: -alertmanager.max-grafana-config-size-bytes
[alertmanager_max_grafana_config_size_bytes: <int> | default = 0]
# Maximum size of configuration file for Alertmanager that tenant can upload via
# Alertmanager API. 0 = no limit.
# CLI flag: -alertmanager.max-config-size-bytes
Expand Down
18 changes: 17 additions & 1 deletion pkg/alertmanager/api_grafana.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,24 @@ func (am *MultitenantAlertmanager) SetUserGrafanaConfig(w http.ResponseWriter, r
return
}

payload, err := io.ReadAll(r.Body)
var input io.Reader
maxConfigSize := am.limits.AlertmanagerMaxGrafanaConfigSize(userID)
if maxConfigSize > 0 {
input = http.MaxBytesReader(w, r.Body, int64(maxConfigSize))
} else {
input = r.Body
}

payload, err := io.ReadAll(input)
if err != nil {
if maxBytesErr := (&http.MaxBytesError{}); errors.As(err, &maxBytesErr) {
msg := fmt.Sprintf(errConfigurationTooBig, maxConfigSize)
level.Warn(logger).Log("msg", msg)
w.WriteHeader(http.StatusBadRequest)
util.WriteJSONResponse(w, errorResult{Status: statusError, Error: msg})
return
}

level.Error(logger).Log("msg", errReadingGrafanaConfig, "err", err.Error())
w.WriteHeader(http.StatusBadRequest)
util.WriteJSONResponse(w, errorResult{Status: statusError, Error: fmt.Sprintf("%s: %s", errReadingGrafanaConfig, err.Error())})
Expand Down
175 changes: 113 additions & 62 deletions pkg/alertmanager/api_grafana_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,74 +308,125 @@ func TestMultitenantAlertmanager_SetUserGrafanaConfig(t *testing.T) {
storage := objstore.NewInMemBucket()
alertstore := bucketclient.NewBucketAlertStore(bucketclient.BucketAlertStoreConfig{}, storage, nil, log.NewNopLogger())

am := &MultitenantAlertmanager{
store: alertstore,
logger: test.NewTestingLogger(t),
}

require.Len(t, storage.Objects(), 0)
req := httptest.NewRequest(http.MethodPost, "/api/v1/grafana/config", nil)
{
rec := httptest.NewRecorder()
am.SetUserGrafanaConfig(rec, req)
require.Equal(t, http.StatusUnauthorized, rec.Code)
require.Len(t, storage.Objects(), 0)
}

ctx := user.InjectOrgID(context.Background(), "test_user")
req = req.WithContext(ctx)
{
// First, try with invalid configuration.
rec := httptest.NewRecorder()
json := `
cases := []struct {
name string
maxConfigSize int
orgID string
body string
expStatusCode int
expResponseBody string
expStorageKey string
}{
{
"configuration_hash": "some_hash",
"created": 12312414343,
"default": false
}
`
req.Body = io.NopCloser(strings.NewReader(json))
am.SetUserGrafanaConfig(rec, req)
require.Equal(t, http.StatusBadRequest, rec.Code)
body, err := io.ReadAll(rec.Body)
require.NoError(t, err)
failedJSON := `
name: "missing org id",
expStatusCode: http.StatusUnauthorized,
},
{
"error": "error marshalling JSON Grafana Alertmanager config: no route provided in config",
"status": "error"
}
`
require.JSONEq(t, failedJSON, string(body))
require.Equal(t, "application/json", rec.Header().Get("Content-Type"))

// Now, with a valid configuration.
rec = httptest.NewRecorder()
json = fmt.Sprintf(`
name: "config size > max size",
body: fmt.Sprintf(`
{
"configuration": %s,
"configuration_hash": "ChEKBW5mbG9nEghzb21lZGF0YQ==",
"created": 12312414343,
"default": false,
"promoted": true,
"external_url": "http://test.grafana.com",
"static_headers": {
"Header-1": "Value-1",
"Header-2": "Value-2"
}
}
`, testGrafanaConfig),
orgID: "test_user",
maxConfigSize: 10,
expStatusCode: http.StatusBadRequest,
expResponseBody: `
{
"error": "Alertmanager configuration is too big, limit: 10 bytes",
"status": "error"
}
`,
},
{
"configuration": %s,
"configuration_hash": "ChEKBW5mbG9nEghzb21lZGF0YQ==",
"created": 12312414343,
"default": false,
"promoted": true,
"external_url": "http://test.grafana.com",
"static_headers": {
"Header-1": "Value-1",
"Header-2": "Value-2"
name: "invalid config",
body: `
{
"configuration_hash": "some_hash",
"created": 12312414343,
"default": false
}
}
`, testGrafanaConfig)
req.Body = io.NopCloser(strings.NewReader(json))
am.SetUserGrafanaConfig(rec, req)
`,
orgID: "test_user",
expStatusCode: http.StatusBadRequest,
expResponseBody: `
{
"error": "error marshalling JSON Grafana Alertmanager config: no route provided in config",
"status": "error"
}
`,
},
{
name: "with valid config",
body: fmt.Sprintf(`
{
"configuration": %s,
"configuration_hash": "ChEKBW5mbG9nEghzb21lZGF0YQ==",
"created": 12312414343,
"default": false,
"promoted": true,
"external_url": "http://test.grafana.com",
"static_headers": {
"Header-1": "Value-1",
"Header-2": "Value-2"
}
}
`, testGrafanaConfig),
orgID: "test_user",
expStatusCode: http.StatusCreated,
expResponseBody: successJSON,
expStorageKey: "grafana_alertmanager/test_user/grafana_config",
},
}

require.Equal(t, http.StatusCreated, rec.Code)
body, err = io.ReadAll(rec.Body)
require.NoError(t, err)
require.JSONEq(t, successJSON, string(body))
require.Equal(t, "application/json", rec.Header().Get("Content-Type"))
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
am := &MultitenantAlertmanager{
store: alertstore,
logger: test.NewTestingLogger(t),
limits: &mockAlertManagerLimits{
maxGrafanaConfigSize: tc.maxConfigSize,
},
}
rec := httptest.NewRecorder()
ctx := context.Background()
if tc.orgID != "" {
ctx = user.InjectOrgID(ctx, "test_user")
}

require.Len(t, storage.Objects(), 1)
_, ok := storage.Objects()["grafana_alertmanager/test_user/grafana_config"]
require.True(t, ok)
req := httptest.NewRequest(
http.MethodPost,
"/api/v1/grafana/config",
io.NopCloser(strings.NewReader(tc.body)),
).WithContext(ctx)

am.SetUserGrafanaConfig(rec, req)
require.Equal(t, tc.expStatusCode, rec.Code)

if tc.expResponseBody != "" {
body, err := io.ReadAll(rec.Body)
require.NoError(t, err)

require.JSONEq(t, tc.expResponseBody, string(body))
}

if tc.expStorageKey == "" {
require.Len(t, storage.Objects(), 0)
} else {
require.Len(t, storage.Objects(), 1)
_, ok := storage.Objects()[tc.expStorageKey]
require.True(t, ok)
}
})
}
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ type Limits interface {
// when limit == rate.Inf.
NotificationBurstSize(tenant string, integration string) int

// AlertmanagerMaxGrafanaConfigSize returns max size of the grafana configuration file that user is allowed to upload. If 0, there is no limit.
AlertmanagerMaxGrafanaConfigSize(tenant string) int

// AlertmanagerMaxConfigSize returns max size of configuration file that user is allowed to upload. If 0, there is no limit.
AlertmanagerMaxConfigSize(tenant string) int

Expand Down
5 changes: 5 additions & 0 deletions pkg/alertmanager/multitenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3293,6 +3293,7 @@ type mockAlertManagerLimits struct {
emailNotificationRateLimit rate.Limit
emailNotificationBurst int
maxConfigSize int
maxGrafanaConfigSize int
maxSilencesCount int
maxSilenceSizeBytes int
maxTemplatesCount int
Expand All @@ -3306,6 +3307,10 @@ func (m *mockAlertManagerLimits) AlertmanagerMaxConfigSize(string) int {
return m.maxConfigSize
}

func (m *mockAlertManagerLimits) AlertmanagerMaxGrafanaConfigSize(string) int {
return m.maxGrafanaConfigSize
}

func (m *mockAlertManagerLimits) AlertmanagerMaxSilencesCount(string) int { return m.maxSilencesCount }

func (m *mockAlertManagerLimits) AlertmanagerMaxSilenceSizeBytes(string) int {
Expand Down
6 changes: 6 additions & 0 deletions pkg/util/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ type Limits struct {
NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"`
NotificationRateLimitPerIntegration LimitsMap[float64] `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"`

AlertmanagerMaxGrafanaConfigSizeBytes int `yaml:"alertmanager_max_grafana_config_size_bytes" json:"alertmanager_max_grafana_config_size_bytes"`
AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"`
AlertmanagerMaxSilencesCount int `yaml:"alertmanager_max_silences_count" json:"alertmanager_max_silences_count"`
AlertmanagerMaxSilenceSizeBytes int `yaml:"alertmanager_max_silence_size_bytes" json:"alertmanager_max_silence_size_bytes"`
Expand Down Expand Up @@ -368,6 +369,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
l.NotificationRateLimitPerIntegration = NotificationRateLimitMap()
}
f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".")
f.IntVar(&l.AlertmanagerMaxGrafanaConfigSizeBytes, "alertmanager.max-grafana-config-size-bytes", 0, "Maximum size of the Grafana configuration file for Alertmanager that a tenant can upload via the Alertmanager API. 0 = no limit.")
f.IntVar(&l.AlertmanagerMaxConfigSizeBytes, "alertmanager.max-config-size-bytes", 0, "Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.")
f.IntVar(&l.AlertmanagerMaxSilencesCount, "alertmanager.max-silences-count", 0, "Maximum number of silences, including expired silences, that a tenant can have at once. 0 = no limit.")
f.IntVar(&l.AlertmanagerMaxSilenceSizeBytes, "alertmanager.max-silence-size-bytes", 0, "Maximum silence size in bytes. 0 = no limit.")
Expand Down Expand Up @@ -1003,6 +1005,10 @@ func (o *Overrides) NotificationBurstSize(user string, integration string) int {
return int(l)
}

func (o *Overrides) AlertmanagerMaxGrafanaConfigSize(userID string) int {
return o.getOverridesForUser(userID).AlertmanagerMaxGrafanaConfigSizeBytes
}

func (o *Overrides) AlertmanagerMaxConfigSize(userID string) int {
return o.getOverridesForUser(userID).AlertmanagerMaxConfigSizeBytes
}
Expand Down

0 comments on commit ca7814e

Please sign in to comment.