Include value in metrics for unspecified value (#389)

Signed-off-by: Jesper Söderlund <[email protected]>
envoyproxy · Jan 17, 2023 · f28024e · f28024e
1 parent 5f3f5a4
commit f28024e
Show file tree

Hide file tree

Showing 13 changed files with 226 additions and 72 deletions.
diff --git a/README.md b/README.md
@@ -16,6 +16,7 @@
     - [Rate limit definition](#rate-limit-definition)
     - [Replaces](#replaces)
     - [ShadowMode](#shadowmode)
+    - [Including detailed metrics for unspecified values](#including-detailed-metrics-for-unspecified-values)
     - [Examples](#examples)
       - [Example 1](#example-1)
       - [Example 2](#example-2)
@@ -24,6 +25,7 @@
       - [Example 5](#example-5)
       - [Example 6](#example-6)
       - [Example 7](#example-7)
+      - [Example 8](#example-8)
   - [Loading Configuration](#loading-configuration)
   - [Log Format](#log-format)
   - [GRPC Keepalive](#grpc-keepalive)
@@ -205,6 +207,7 @@ descriptors:
       unit: <see below: required>
       requests_per_unit: <see below: required>
     shadow_mode: (optional)
+    detailed_metric: (optional)
     descriptors: (optional block)
       - ... (nested repetition of above)
 ```
@@ -253,6 +256,12 @@ An additional statistic is added to keep track of how many times a key with "sha
 
 There is also a Global Shadow Mode
 
+### Including detailed metrics for unspecified values
+
+Setting the `detailed_metric: true` for a descriptor will extend the metrics that are produced. Normally a desriptor that matches a value that is not explicitly listed in the configuration will from a metrics point-of-view be rolled-up into the base entry. This can be probelmatic if you want to have those details available for analysis.
+
+NB! This should only be enabled in situations where the potentially large cardinality of metrics that this can lead to is acceptable.
+
 ### Examples
 
 #### Example 1
@@ -490,6 +499,43 @@ descriptors:
           unit: second
 ```
 
+#### Example 8
+
+In this example we demonstrate how a descriptor without a specified value is configured to override the default behavior and include the matched-value in the metrics.
+
+Rate limting configuration and tracking works as normally
+
+```
+(key_1, unspecified_value): 10 / sec
+(key_1, unspecified_value2): 10 / sec
+(key_1, value_1): 20 / sec
+```
+
+```yaml
+domain: example8
+descriptors:
+  - key: key1
+    detailed_metric: true
+    rate_limit:
+      unit: minute
+      requests_per_unit: 10
+  - key: key1
+    value: value1
+    rate_limit:
+      unit: minute
+      requests_per_unit: 20
+```
+
+The metrics keys will be the following:
+
+"key1_unspecified_value"
+"key1_unspecified_value2"
+"key1_value1"
+
+rather than the normal
+"key1"
+"key1_value1"
+
 ## Loading Configuration
 
 The Ratelimit service uses a library written by Lyft called [goruntime](https://github.com/lyft/goruntime) to do configuration loading. Goruntime monitors
@@ -623,6 +669,8 @@ KEY_VALUE:
 - A combination of the key value
 - Nested descriptors would be suffixed in the stats path
 
+The default mode is that the value-part is omitted if the rule that matches is a descriptor without a value. Specifying the `detailed_metric` configuration parameter changes this behavior and creates a unique metric even in this situation.
+
 STAT:
 
 - near_limit: Number of rule hits over the NearLimit ratio threshold (currently 80%) but under the threshold rate.

diff --git a/examples/envoy/proxy.yaml b/examples/envoy/proxy.yaml
@@ -149,3 +149,12 @@ static_resources:
                                   - request_headers:
                                       header_name: "category"
                                       descriptor_key: "category"
+                        - match:
+                            prefix: /unspec
+                          route:
+                            cluster: mock
+                            rate_limits:
+                              - actions:
+                                  - request_headers:
+                                      header_name: "unspec"
+                                      descriptor_key: "unspec"
diff --git a/examples/ratelimit/config/example.yaml b/examples/ratelimit/config/example.yaml
@@ -76,6 +76,11 @@ descriptors:
       - key: bay
         rate_limit:
           unlimited: true
+  - key: unspec
+    detailed_metric: true
+    rate_limit:
+      unit: minute
+      requests_per_unit: 2
   - key: qux
     rate_limit:
       unlimited: true
diff --git a/integration-test/scripts/value-included-in-stats-key-when-unspecified.sh b/integration-test/scripts/value-included-in-stats-key-when-unspecified.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+#
+# descriptor: (unspec: *)
+# Has rate limit quota 2 req / min
+# detailed_metric is true
+#
+
+response=$(curl -f -s -H "unspec: unspecified_value" http://envoy-proxy:8888/unspec)
+response=$(curl -f -s -H "unspec: unspecified_value" http://envoy-proxy:8888/unspec)
+
+# This should be successful
+if [ $? -ne 0 ]; then
+	echo "These should not be rate limited"
+	exit 1
+fi
+
+# This one should be ratelimited
+response=$(curl -f -s -H "unspec: unspecified_value" http://envoy-proxy:8888/unspec)
+
+if [ $? -eq 0 ]; then
+	echo "This should be a ratelimited call"
+	exit 1
+fi
+
+# Sleep a bit to allow the stats to be propagated
+sleep 2
+
+# Extract the metric for the unspecified value, which shoulb be there due to the "detailed_metric"
+stats=$(curl -f -s statsd:9102/metrics | grep -e ratelimit_service_rate_limit_over_limit | grep unspec_unspecified_value | cut -d} -f2 | sed 's/ //g')
+
+echo "Length: ${#stats}"
+echo "${stats}"
+
+if [ "${stats}" != "1" ]; then
+	echo "Overlimit should be 1"
+	exit 1
+fi
diff --git a/src/config/config.go b/src/config/config.go
@@ -17,13 +17,14 @@ func (e RateLimitConfigError) Error() string {
 
 // Wrapper for an individual rate limit config entry which includes the defined limit and stats.
 type RateLimit struct {
-	FullKey    string
-	Stats      stats.RateLimitStats
-	Limit      *pb.RateLimitResponse_RateLimit
-	Unlimited  bool
-	ShadowMode bool
-	Name       string
-	Replaces   []string
+	FullKey                              string
+	Stats                                stats.RateLimitStats
+	Limit                                *pb.RateLimitResponse_RateLimit
+	Unlimited                            bool
+	ShadowMode                           bool
+	Name                                 string
+	Replaces                             []string
+	IncludeValueInMetricWhenNotSpecified bool
 }
 
 // Interface for interacting with a loaded rate limit config.

diff --git a/src/config/config_impl.go b/src/config/config_impl.go
@@ -26,11 +26,12 @@ type YamlRateLimit struct {
 }
 
 type YamlDescriptor struct {
-	Key         string
-	Value       string
-	RateLimit   *YamlRateLimit `yaml:"rate_limit"`
-	Descriptors []YamlDescriptor
-	ShadowMode  bool `yaml:"shadow_mode"`
+	Key                               string
+	Value                             string
+	RateLimit                         *YamlRateLimit `yaml:"rate_limit"`
+	Descriptors                       []YamlDescriptor
+	ShadowMode                        bool `yaml:"shadow_mode"`
+	IncludeMetricsForUnspecifiedValue bool `yaml:"detailed_metric"`
 }
 
 type YamlRoot struct {
@@ -65,6 +66,7 @@ var validKeys = map[string]bool{
 	"shadow_mode":       true,
 	"name":              true,
 	"replaces":          true,
+	"detailed_metric":   true,
 }
 
 // Create a new rate limit config entry.
@@ -74,7 +76,7 @@ var validKeys = map[string]bool{
 // @param unlimited supplies whether the rate limit is unlimited
 // @return the new config entry.
 func NewRateLimit(requestsPerUnit uint32, unit pb.RateLimitResponse_RateLimit_Unit, rlStats stats.RateLimitStats,
-	unlimited bool, shadowMode bool, name string, replaces []string) *RateLimit {
+	unlimited bool, shadowMode bool, name string, replaces []string, includeValueInMetricWhenNotSpecified bool) *RateLimit {
 
 	return &RateLimit{
 		FullKey: rlStats.GetKey(),
@@ -83,10 +85,11 @@ func NewRateLimit(requestsPerUnit uint32, unit pb.RateLimitResponse_RateLimit_Un
 			RequestsPerUnit: requestsPerUnit,
 			Unit:            unit,
 		},
-		Unlimited:  unlimited,
-		ShadowMode: shadowMode,
-		Name:       name,
-		Replaces:   replaces,
+		Unlimited:                            unlimited,
+		ShadowMode:                           shadowMode,
+		Name:                                 name,
+		Replaces:                             replaces,
+		IncludeValueInMetricWhenNotSpecified: includeValueInMetricWhenNotSpecified,
 	}
 }
 
@@ -163,7 +166,7 @@ func (this *rateLimitDescriptor) loadDescriptors(config RateLimitConfigToLoad, p
 			rateLimit = NewRateLimit(
 				descriptorConfig.RateLimit.RequestsPerUnit, pb.RateLimitResponse_RateLimit_Unit(value),
 				statsManager.NewStats(newParentKey), unlimited, descriptorConfig.ShadowMode,
-				descriptorConfig.RateLimit.Name, replaces,
+				descriptorConfig.RateLimit.Name, replaces, descriptorConfig.IncludeMetricsForUnspecifiedValue,
 			)
 			rateLimitDebugString = fmt.Sprintf(
 				" ratelimit={requests_per_unit=%d, unit=%s, unlimited=%t, shadow_mode=%t}", rateLimit.Limit.RequestsPerUnit,
@@ -306,6 +309,7 @@ func (this *rateLimitConfigImpl) GetLimit(
 			false,
 			"",
 			[]string{},
+			false,
 		)
 		return rateLimit
 	}
@@ -325,6 +329,7 @@ func (this *rateLimitConfigImpl) GetLimit(
 
 		if nextDescriptor != nil && nextDescriptor.limit != nil {
 			logger.Debugf("found rate limit: %s", finalKey)
+
 			if i == len(descriptor.Entries)-1 {
 				rateLimit = nextDescriptor.limit
 			} else {
@@ -336,6 +341,10 @@ func (this *rateLimitConfigImpl) GetLimit(
 			logger.Debugf("iterating to next level")
 			descriptorsMap = nextDescriptor.descriptors
 		} else {
+			if rateLimit != nil && rateLimit.IncludeValueInMetricWhenNotSpecified {
+				rateLimit = NewRateLimit(rateLimit.Limit.RequestsPerUnit, rateLimit.Limit.Unit, this.statsManager.NewStats(rateLimit.FullKey+"_"+entry.Value), rateLimit.Unlimited, rateLimit.ShadowMode, rateLimit.Name, rateLimit.Replaces, false)
+			}
+
 			break
 		}
 	}

diff --git a/test/config/basic_config.yaml b/test/config/basic_config.yaml
@@ -60,3 +60,11 @@ descriptors:
   - key: key6
     rate_limit:
       unlimited: true
+
+  # Top level key only with default rate limit.
+
+  - key: key7
+    detailed_metric: true
+    rate_limit:
+      unit: minute
+      requests_per_unit: 70
diff --git a/test/config/config_test.go b/test/config/config_test.go
@@ -177,6 +177,42 @@ func TestBasicConfig(t *testing.T) {
 	assert.True(rl.Unlimited)
 	assert.EqualValues(1, stats.NewCounter("test-domain.key6.total_hits").Value())
 	assert.EqualValues(1, stats.NewCounter("test-domain.key6.within_limit").Value())
+
+	// A value for the key with detailed_metric: true
+	// should also generate a stat with the value included
+	rl = rlConfig.GetLimit(
+		nil, "test-domain",
+		&pb_struct.RateLimitDescriptor{
+			Entries: []*pb_struct.RateLimitDescriptor_Entry{{Key: "key7", Value: "unspecified_value"}},
+		})
+	rl.Stats.TotalHits.Inc()
+	rl.Stats.OverLimit.Inc()
+	rl.Stats.NearLimit.Inc()
+	rl.Stats.WithinLimit.Inc()
+	assert.EqualValues(70, rl.Limit.RequestsPerUnit)
+	assert.Equal(pb.RateLimitResponse_RateLimit_MINUTE, rl.Limit.Unit)
+	assert.EqualValues(1, stats.NewCounter("test-domain.key7_unspecified_value.total_hits").Value())
+	assert.EqualValues(1, stats.NewCounter("test-domain.key7_unspecified_value.over_limit").Value())
+	assert.EqualValues(1, stats.NewCounter("test-domain.key7_unspecified_value.near_limit").Value())
+	assert.EqualValues(1, stats.NewCounter("test-domain.key7_unspecified_value.within_limit").Value())
+
+	// Another value for the key with detailed_metric: true
+	// should also generate a stat with the value included
+	rl = rlConfig.GetLimit(
+		nil, "test-domain",
+		&pb_struct.RateLimitDescriptor{
+			Entries: []*pb_struct.RateLimitDescriptor_Entry{{Key: "key7", Value: "another_value"}},
+		})
+	rl.Stats.TotalHits.Inc()
+	rl.Stats.OverLimit.Inc()
+	rl.Stats.NearLimit.Inc()
+	rl.Stats.WithinLimit.Inc()
+	assert.EqualValues(70, rl.Limit.RequestsPerUnit)
+	assert.Equal(pb.RateLimitResponse_RateLimit_MINUTE, rl.Limit.Unit)
+	assert.EqualValues(1, stats.NewCounter("test-domain.key7_another_value.total_hits").Value())
+	assert.EqualValues(1, stats.NewCounter("test-domain.key7_another_value.over_limit").Value())
+	assert.EqualValues(1, stats.NewCounter("test-domain.key7_another_value.near_limit").Value())
+	assert.EqualValues(1, stats.NewCounter("test-domain.key7_another_value.within_limit").Value())
 }
 
 func TestDomainMerge(t *testing.T) {

diff --git a/test/limiter/base_limiter_test.go b/test/limiter/base_limiter_test.go
@@ -29,7 +29,7 @@ func TestGenerateCacheKeys(t *testing.T) {
 	timeSource.EXPECT().UnixNow().Return(int64(1234))
 	baseRateLimit := limiter.NewBaseRateLimit(timeSource, rand.New(jitterSource), 3600, nil, 0.8, "", sm)
 	request := common.NewRateLimitRequest("domain", [][][2]string{{{"key", "value"}}}, 1)
-	limits := []*config.RateLimit{config.NewRateLimit(10, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil)}
+	limits := []*config.RateLimit{config.NewRateLimit(10, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil, false)}
 	assert.Equal(uint64(0), limits[0].Stats.TotalHits.Value())
 	cacheKeys := baseRateLimit.GenerateCacheKeys(request, limits, 1)
 	assert.Equal(1, len(cacheKeys))
@@ -48,7 +48,7 @@ func TestGenerateCacheKeysPrefix(t *testing.T) {
 	timeSource.EXPECT().UnixNow().Return(int64(1234))
 	baseRateLimit := limiter.NewBaseRateLimit(timeSource, rand.New(jitterSource), 3600, nil, 0.8, "prefix:", sm)
 	request := common.NewRateLimitRequest("domain", [][][2]string{{{"key", "value"}}}, 1)
-	limits := []*config.RateLimit{config.NewRateLimit(10, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil)}
+	limits := []*config.RateLimit{config.NewRateLimit(10, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil, false)}
 	assert.Equal(uint64(0), limits[0].Stats.TotalHits.Value())
 	cacheKeys := baseRateLimit.GenerateCacheKeys(request, limits, 1)
 	assert.Equal(1, len(cacheKeys))
@@ -102,7 +102,7 @@ func TestGetResponseStatusOverLimitWithLocalCache(t *testing.T) {
 	statsStore := stats.NewStore(stats.NewNullSink(), false)
 	sm := mockstats.NewMockStatManager(statsStore)
 	baseRateLimit := limiter.NewBaseRateLimit(timeSource, nil, 3600, nil, 0.8, "", sm)
-	limits := []*config.RateLimit{config.NewRateLimit(5, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil)}
+	limits := []*config.RateLimit{config.NewRateLimit(5, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil, false)}
 	limitInfo := limiter.NewRateLimitInfo(limits[0], 2, 6, 4, 5)
 	// As `isOverLimitWithLocalCache` is passed as `true`, immediate response is returned with no checks of the limits.
 	responseStatus := baseRateLimit.GetResponseDescriptorStatus("key", limitInfo, true, 2)
@@ -125,7 +125,7 @@ func TestGetResponseStatusOverLimitWithLocalCacheShadowMode(t *testing.T) {
 	sm := mockstats.NewMockStatManager(statsStore)
 	baseRateLimit := limiter.NewBaseRateLimit(timeSource, nil, 3600, nil, 0.8, "", sm)
 	// This limit is in ShadowMode
-	limits := []*config.RateLimit{config.NewRateLimit(5, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, true, "", nil)}
+	limits := []*config.RateLimit{config.NewRateLimit(5, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, true, "", nil, false)}
 	limitInfo := limiter.NewRateLimitInfo(limits[0], 2, 6, 4, 5)
 	// As `isOverLimitWithLocalCache` is passed as `true`, immediate response is returned with no checks of the limits.
 	responseStatus := baseRateLimit.GetResponseDescriptorStatus("key", limitInfo, true, 2)
@@ -149,7 +149,7 @@ func TestGetResponseStatusOverLimit(t *testing.T) {
 	localCache := freecache.NewCache(100)
 	sm := mockstats.NewMockStatManager(statsStore)
 	baseRateLimit := limiter.NewBaseRateLimit(timeSource, nil, 3600, localCache, 0.8, "", sm)
-	limits := []*config.RateLimit{config.NewRateLimit(5, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil)}
+	limits := []*config.RateLimit{config.NewRateLimit(5, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil, false)}
 	limitInfo := limiter.NewRateLimitInfo(limits[0], 2, 7, 4, 5)
 	responseStatus := baseRateLimit.GetResponseDescriptorStatus("key", limitInfo, false, 1)
 	assert.Equal(pb.RateLimitResponse_OVER_LIMIT, responseStatus.GetCode())
@@ -175,7 +175,7 @@ func TestGetResponseStatusOverLimitShadowMode(t *testing.T) {
 	sm := mockstats.NewMockStatManager(statsStore)
 	baseRateLimit := limiter.NewBaseRateLimit(timeSource, nil, 3600, localCache, 0.8, "", sm)
 	// Key is in shadow_mode: true
-	limits := []*config.RateLimit{config.NewRateLimit(5, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, true, "", nil)}
+	limits := []*config.RateLimit{config.NewRateLimit(5, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, true, "", nil, false)}
 	limitInfo := limiter.NewRateLimitInfo(limits[0], 2, 7, 4, 5)
 	responseStatus := baseRateLimit.GetResponseDescriptorStatus("key", limitInfo, false, 1)
 	assert.Equal(pb.RateLimitResponse_OK, responseStatus.GetCode())
@@ -197,7 +197,7 @@ func TestGetResponseStatusBelowLimit(t *testing.T) {
 	statsStore := stats.NewStore(stats.NewNullSink(), false)
 	sm := mockstats.NewMockStatManager(statsStore)
 	baseRateLimit := limiter.NewBaseRateLimit(timeSource, nil, 3600, nil, 0.8, "", sm)
-	limits := []*config.RateLimit{config.NewRateLimit(10, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil)}
+	limits := []*config.RateLimit{config.NewRateLimit(10, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, false, "", nil, false)}
 	limitInfo := limiter.NewRateLimitInfo(limits[0], 2, 6, 9, 10)
 	responseStatus := baseRateLimit.GetResponseDescriptorStatus("key", limitInfo, false, 1)
 	assert.Equal(pb.RateLimitResponse_OK, responseStatus.GetCode())
@@ -218,7 +218,7 @@ func TestGetResponseStatusBelowLimitShadowMode(t *testing.T) {
 	statsStore := stats.NewStore(stats.NewNullSink(), false)
 	sm := mockstats.NewMockStatManager(statsStore)
 	baseRateLimit := limiter.NewBaseRateLimit(timeSource, nil, 3600, nil, 0.8, "", sm)
-	limits := []*config.RateLimit{config.NewRateLimit(10, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, true, "", nil)}
+	limits := []*config.RateLimit{config.NewRateLimit(10, pb.RateLimitResponse_RateLimit_SECOND, sm.NewStats("key_value"), false, true, "", nil, false)}
 	limitInfo := limiter.NewRateLimitInfo(limits[0], 2, 6, 9, 10)
 	responseStatus := baseRateLimit.GetResponseDescriptorStatus("key", limitInfo, false, 1)
 	assert.Equal(pb.RateLimitResponse_OK, responseStatus.GetCode())