Use CacheKeyGenerator implementation in error caching middleware (#9644)

Use an implementation of the CacheKeyGenerator interface instead of bare function to generate cache keys for error caching. This allows downstream consumers (GEM) to inject custom logic for caching based on LBAC rules. Signed-off-by: Nick Pillitteri <[email protected]>
grafana · Oct 17, 2024 · 4377b80 · 4377b80
1 parent 9de001e
commit 4377b80
Show file tree

Hide file tree

Showing 5 changed files with 22 additions and 17 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -49,6 +49,7 @@
 * [BUGFIX] Fix issue where negation of native histograms (eg. `-some_native_histogram_series`) did nothing. #9508
 * [BUGFIX] Fix issue where `metric might not be a counter, name does not end in _total/_sum/_count/_bucket` annotation would be emitted even if `rate` or `increase` did not have enough samples to compute a result. #9508
 * [BUGFIX] Fix issue where sharded queries could return annotations with incorrect or confusing position information. #9536
+* [BUGFIX] Fix issue where downstream consumers may not generate correct cache keys for experimental error caching. #9644
 
 ### Mixin
 

diff --git a/pkg/frontend/querymiddleware/error_caching.go b/pkg/frontend/querymiddleware/error_caching.go
@@ -5,7 +5,6 @@ package querymiddleware
 import (
 	"context"
 	"errors"
-	"fmt"
 	"time"
 
 	"github.com/go-kit/log"
@@ -26,7 +25,7 @@ const (
 	reasonNotCacheableError = "not-cacheable-api-error"
 )
 
-func newErrorCachingMiddleware(cache cache.Cache, limits Limits, shouldCacheReq shouldCacheFn, logger log.Logger, reg prometheus.Registerer) MetricsQueryMiddleware {
+func newErrorCachingMiddleware(cache cache.Cache, limits Limits, shouldCacheReq shouldCacheFn, keyGen CacheKeyGenerator, logger log.Logger, reg prometheus.Registerer) MetricsQueryMiddleware {
 	cacheLoadAttempted := promauto.With(reg).NewCounter(prometheus.CounterOpts{
 		Name: "cortex_frontend_query_error_cache_requests_total",
 		Help: "Number of requests that check the results cache for an error.",
@@ -50,6 +49,7 @@ func newErrorCachingMiddleware(cache cache.Cache, limits Limits, shouldCacheReq
 			cache:               cache,
 			limits:              limits,
 			shouldCacheReq:      shouldCacheReq,
+			keyGen:              keyGen,
 			logger:              logger,
 			cacheLoadAttempted:  cacheLoadAttempted,
 			cacheLoadHits:       cacheLoadHits,
@@ -64,6 +64,7 @@ type errorCachingHandler struct {
 	cache          cache.Cache
 	limits         Limits
 	shouldCacheReq shouldCacheFn
+	keyGen         CacheKeyGenerator
 	logger         log.Logger
 
 	cacheLoadAttempted  prometheus.Counter
@@ -85,7 +86,7 @@ func (e *errorCachingHandler) Do(ctx context.Context, request MetricsQueryReques
 	}
 
 	e.cacheLoadAttempted.Inc()
-	key := errorCachingKey(tenant.JoinTenantIDs(tenantIDs), request)
+	key := e.keyGen.QueryRequestError(ctx, tenant.JoinTenantIDs(tenantIDs), request)
 	hashedKey := cacheHashKey(key)
 
 	if cachedErr := e.loadErrorFromCache(ctx, key, hashedKey, spanLog); cachedErr != nil {
@@ -176,9 +177,3 @@ func (e *errorCachingHandler) isCacheable(apiErr *apierror.APIError) (bool, stri
 
 	return true, ""
 }
-
-// errorCachingKey returns the key for caching and error query response. Standalone function
-// to allow for easier testing.
-func errorCachingKey(tenantID string, r MetricsQueryRequest) string {
-	return fmt.Sprintf("EC:%s:%s:%d:%d:%d", tenantID, r.GetQuery(), r.GetStart(), r.GetEnd(), r.GetStep())
-}
diff --git a/pkg/frontend/querymiddleware/error_caching_test.go b/pkg/frontend/querymiddleware/error_caching_test.go
@@ -20,6 +20,8 @@ import (
 )
 
 func TestErrorCachingHandler_Do(t *testing.T) {
+	keyGen := NewDefaultCacheKeyGenerator(newTestPrometheusCodec(), time.Second)
+
 	newDefaultRequest := func() *PrometheusRangeQueryRequest {
 		return &PrometheusRangeQueryRequest{
 			queryExpr: parseQuery(t, "up"),
@@ -31,7 +33,7 @@ func TestErrorCachingHandler_Do(t *testing.T) {
 
 	runHandler := func(ctx context.Context, inner MetricsQueryHandler, c cache.Cache, req MetricsQueryRequest) (Response, error) {
 		limits := &mockLimits{resultsCacheTTLForErrors: time.Minute}
-		middleware := newErrorCachingMiddleware(c, limits, resultsCacheEnabledByOption, test.NewTestingLogger(t), prometheus.NewPedanticRegistry())
+		middleware := newErrorCachingMiddleware(c, limits, resultsCacheEnabledByOption, keyGen, test.NewTestingLogger(t), prometheus.NewPedanticRegistry())
 		handler := middleware.Wrap(inner)
 		return handler.Do(ctx, req)
 	}
@@ -80,7 +82,7 @@ func TestErrorCachingHandler_Do(t *testing.T) {
 
 		ctx := user.InjectOrgID(context.Background(), "1234")
 		req := newDefaultRequest()
-		key := errorCachingKey("1234", req)
+		key := keyGen.QueryRequestError(ctx, "1234", req)
 		bytes, err := proto.Marshal(&CachedError{
 			Key:          key,
 			ErrorType:    string(apierror.TypeExec),
@@ -108,7 +110,7 @@ func TestErrorCachingHandler_Do(t *testing.T) {
 		ctx := user.InjectOrgID(context.Background(), "1234")
 		req := newDefaultRequest()
 
-		key := errorCachingKey("1234", req)
+		key := keyGen.QueryRequestError(ctx, "1234", req)
 		bytes, err := proto.Marshal(&CachedError{
 			Key:          "different key that is stored under the same hashed key",
 			ErrorType:    string(apierror.TypeExec),
@@ -136,7 +138,7 @@ func TestErrorCachingHandler_Do(t *testing.T) {
 		ctx := user.InjectOrgID(context.Background(), "1234")
 		req := newDefaultRequest()
 
-		key := errorCachingKey("1234", req)
+		key := keyGen.QueryRequestError(ctx, "1234", req)
 		bytes := []byte{0x0, 0x0, 0x0, 0x0}
 
 		// NOTE: We rely on this mock cache being synchronous

diff --git a/pkg/frontend/querymiddleware/results_cache.go b/pkg/frontend/querymiddleware/results_cache.go
@@ -192,6 +192,9 @@ type CacheKeyGenerator interface {
 	// QueryRequest should generate a cache key based on the tenant ID and MetricsQueryRequest.
 	QueryRequest(ctx context.Context, tenantID string, r MetricsQueryRequest) string
 
+	// QueryRequestError should generate a cache key based on errors for the tenant ID and MetricsQueryRequest.
+	QueryRequestError(ctx context.Context, tenantID string, r MetricsQueryRequest) string
+
 	// LabelValues should return a cache key for a label values request. The cache key does not need to contain the tenant ID.
 	// LabelValues can return ErrUnsupportedRequest, in which case the response won't be treated as an error, but the item will still not be cached.
 	// LabelValues should return a nil *GenericQueryCacheKey when it returns an error and
@@ -219,16 +222,20 @@ func NewDefaultCacheKeyGenerator(codec Codec, interval time.Duration) DefaultCac
 }
 
 // QueryRequest generates a cache key based on the userID, MetricsQueryRequest and interval.
-func (g DefaultCacheKeyGenerator) QueryRequest(_ context.Context, userID string, r MetricsQueryRequest) string {
+func (g DefaultCacheKeyGenerator) QueryRequest(_ context.Context, tenantID string, r MetricsQueryRequest) string {
 	startInterval := r.GetStart() / g.interval.Milliseconds()
 	stepOffset := r.GetStart() % r.GetStep()
 
 	// Use original format for step-aligned request, so that we can use existing cached results for such requests.
 	if stepOffset == 0 {
-		return fmt.Sprintf("%s:%s:%d:%d", userID, r.GetQuery(), r.GetStep(), startInterval)
+		return fmt.Sprintf("%s:%s:%d:%d", tenantID, r.GetQuery(), r.GetStep(), startInterval)
 	}
 
-	return fmt.Sprintf("%s:%s:%d:%d:%d", userID, r.GetQuery(), r.GetStep(), startInterval, stepOffset)
+	return fmt.Sprintf("%s:%s:%d:%d:%d", tenantID, r.GetQuery(), r.GetStep(), startInterval, stepOffset)
+}
+
+func (g DefaultCacheKeyGenerator) QueryRequestError(_ context.Context, tenantID string, r MetricsQueryRequest) string {
+	return fmt.Sprintf("EC:%s:%s:%d:%d:%d", tenantID, r.GetQuery(), r.GetStart(), r.GetEnd(), r.GetStep())
 }
 
 // shouldCacheFn checks whether the current request should go to cache

diff --git a/pkg/frontend/querymiddleware/roundtrip.go b/pkg/frontend/querymiddleware/roundtrip.go
@@ -357,7 +357,7 @@ func newQueryMiddlewares(
 		queryRangeMiddleware = append(
 			queryRangeMiddleware,
 			newInstrumentMiddleware("error_caching", metrics),
-			newErrorCachingMiddleware(cacheClient, limits, resultsCacheEnabledByOption, log, registerer),
+			newErrorCachingMiddleware(cacheClient, limits, resultsCacheEnabledByOption, cacheKeyGenerator, log, registerer),
 		)
 	}