Skip to content

caching bucket for parquet chunks file #6805

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
* [ENHANCEMENT] Ingester: Handle runtime errors in query path #6769
* [ENHANCEMENT] Compactor: Support metadata caching bucket for Cleaner. Can be enabled via `-compactor.cleaner-caching-bucket-enabled` flag. #6778
* [ENHANCEMENT] Compactor, Store Gateway: Introduce user scanner strategy and user index. #6780
* [ENHANCEMENT] Querier: Support chunks cache for parquet queryable. #6805
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
* [BUGFIX] Ingester: Fix labelset data race condition. #6573
* [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ require (
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
github.com/parquet-go/parquet-go v0.25.1
github.com/prometheus-community/parquet-common v0.0.0-20250606162055-b81ebb7e1b96
github.com/prometheus-community/parquet-common v0.0.0-20250610002942-dfd72bae1309
github.com/prometheus/procfs v0.16.1
github.com/sercand/kuberesolver/v5 v5.1.1
github.com/tjhop/slog-gokit v0.1.4
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1543,8 +1543,8 @@ github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndr
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prometheus-community/parquet-common v0.0.0-20250606162055-b81ebb7e1b96 h1:5EbDNJOxTWGpe6yzXdgcBCU63BRSrRAh0Q1oB5AVyoA=
github.com/prometheus-community/parquet-common v0.0.0-20250606162055-b81ebb7e1b96/go.mod h1:MwYpD+FKot7LWBMFaPS6FeM8oqo77u5erRlNkSSFPA0=
github.com/prometheus-community/parquet-common v0.0.0-20250610002942-dfd72bae1309 h1:xGnXldBSTFPopLYi7ce+kJb+A1h1mPTeF4SLlRTEek0=
github.com/prometheus-community/parquet-common v0.0.0-20250610002942-dfd72bae1309/go.mod h1:MwYpD+FKot7LWBMFaPS6FeM8oqo77u5erRlNkSSFPA0=
github.com/prometheus-community/prom-label-proxy v0.11.0 h1:IO02WiiFMfcIqvjhwMbCYnDJiTNcSHBrkCGRQ/7KDd0=
github.com/prometheus-community/prom-label-proxy v0.11.0/go.mod h1:lfvrG70XqsxWDrSh1843QXBG0fSg8EbIXmAo8xGsvw8=
github.com/prometheus/alertmanager v0.28.1 h1:BK5pCoAtaKg01BYRUJhEDV1tqJMEtYBGzPw8QdvnnvA=
Expand Down
13 changes: 2 additions & 11 deletions pkg/querier/blocks_store_queryable.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,19 +186,10 @@ func NewBlocksStoreQueryable(
func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegateway.Config, storageCfg cortex_tsdb.BlocksStorageConfig, limits BlocksStoreLimits, logger log.Logger, reg prometheus.Registerer) (*BlocksStoreQueryable, error) {
var stores BlocksStoreSet

bucketClient, err := bucket.NewClient(context.Background(), storageCfg.Bucket, gatewayCfg.HedgedRequest.GetHedgedRoundTripper(), "querier", logger, reg)
bucketClient, err := createCachingBucketClient(context.Background(), storageCfg, gatewayCfg.HedgedRequest.GetHedgedRoundTripper(), "querier", logger, reg)
if err != nil {
return nil, errors.Wrap(err, "failed to create bucket client")
return nil, err
}

// Blocks finder doesn't use chunks, but we pass config for consistency.
matchers := cortex_tsdb.NewMatchers()
cachingBucket, err := cortex_tsdb.CreateCachingBucket(storageCfg.BucketStore.ChunksCache, storageCfg.BucketStore.MetadataCache, matchers, bucketClient, logger, extprom.WrapRegistererWith(prometheus.Labels{"component": "querier"}, reg))
if err != nil {
return nil, errors.Wrap(err, "create caching bucket")
}
bucketClient = cachingBucket

// Create the blocks finder.
var finder BlocksFinder
if storageCfg.BucketStore.BucketIndex.Enabled {
Expand Down
31 changes: 31 additions & 0 deletions pkg/querier/bucket.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package querier

import (
"context"
"net/http"

"github.com/go-kit/log"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/thanos-io/objstore"
"github.com/thanos-io/thanos/pkg/extprom"

"github.com/cortexproject/cortex/pkg/storage/bucket"
cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
)

func createCachingBucketClient(ctx context.Context, storageCfg cortex_tsdb.BlocksStorageConfig, hedgedRoundTripper func(rt http.RoundTripper) http.RoundTripper, name string, logger log.Logger, reg prometheus.Registerer) (objstore.InstrumentedBucket, error) {
bucketClient, err := bucket.NewClient(ctx, storageCfg.Bucket, hedgedRoundTripper, name, logger, reg)
if err != nil {
return nil, errors.Wrap(err, "failed to create bucket client")
}

// Blocks finder doesn't use chunks, but we pass config for consistency.
matchers := cortex_tsdb.NewMatchers()
cachingBucket, err := cortex_tsdb.CreateCachingBucket(storageCfg.BucketStore.ChunksCache, storageCfg.BucketStore.MetadataCache, matchers, bucketClient, logger, extprom.WrapRegistererWith(prometheus.Labels{"component": name}, reg))
if err != nil {
return nil, errors.Wrap(err, "create caching bucket")
}
bucketClient = cachingBucket
return bucketClient, nil
}
6 changes: 3 additions & 3 deletions pkg/querier/parquet_queryable.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ func NewParquetQueryable(
logger log.Logger,
reg prometheus.Registerer,
) (storage.Queryable, error) {
bucketClient, err := bucket.NewClient(context.Background(), storageCfg.Bucket, nil, "parquet-querier", logger, reg)

bucketClient, err := createCachingBucketClient(context.Background(), storageCfg, nil, "parquet-querier", logger, reg)
if err != nil {
return nil, err
}

manager, err := services.NewManager(blockStorageQueryable)
if err != nil {
return nil, err
Expand Down Expand Up @@ -400,7 +400,7 @@ func (q *parquetQuerierWithFallback) Select(ctx context.Context, sortSeries bool
hints.End = maxt

if maxt < mint {
return nil
return storage.EmptySeriesSet()
}

remaining, parquet, err := q.getBlocks(ctx, mint, maxt)
Expand Down
12 changes: 12 additions & 0 deletions pkg/storage/tsdb/caching_bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ func CreateCachingBucket(chunksConfig ChunksCacheConfig, metadataConfig Metadata
cachingConfigured = true
chunksCache = cache.NewTracingCache(chunksCache)
cfg.CacheGetRange("chunks", chunksCache, matchers.GetChunksMatcher(), chunksConfig.SubrangeSize, chunksConfig.AttributesTTL, chunksConfig.SubrangeTTL, chunksConfig.MaxGetRangeRequests)
cfg.CacheGetRange("parquet-chunks", chunksCache, matchers.GetParquetChunksMatcher(), chunksConfig.SubrangeSize, chunksConfig.AttributesTTL, chunksConfig.SubrangeTTL, chunksConfig.MaxGetRangeRequests)
}

metadataCache, err := createMetadataCache("metadata-cache", &metadataConfig.MetadataCacheBackend, logger, reg)
Expand Down Expand Up @@ -356,6 +357,7 @@ type Matchers struct {
func NewMatchers() Matchers {
matcherMap := make(map[string]func(string) bool)
matcherMap["chunks"] = isTSDBChunkFile
matcherMap["parquet-chunks"] = isParquetChunkFile
matcherMap["metafile"] = isMetaFile
matcherMap["block-index"] = isBlockIndexFile
matcherMap["bucket-index"] = isBucketIndexFiles
Expand All @@ -375,6 +377,10 @@ func (m *Matchers) SetChunksMatcher(f func(string) bool) {
m.matcherMap["chunks"] = f
}

func (m *Matchers) SetParquetChunksMatcher(f func(string) bool) {
m.matcherMap["parquet-chunks"] = f
}

func (m *Matchers) SetBlockIndexMatcher(f func(string) bool) {
m.matcherMap["block-index"] = f
}
Expand All @@ -399,6 +405,10 @@ func (m *Matchers) GetChunksMatcher() func(string) bool {
return m.matcherMap["chunks"]
}

func (m *Matchers) GetParquetChunksMatcher() func(string) bool {
return m.matcherMap["parquet-chunks"]
}

func (m *Matchers) GetMetafileMatcher() func(string) bool {
return m.matcherMap["metafile"]
}
Expand Down Expand Up @@ -427,6 +437,8 @@ var chunksMatcher = regexp.MustCompile(`^.*/chunks/\d+$`)

func isTSDBChunkFile(name string) bool { return chunksMatcher.MatchString(name) }

func isParquetChunkFile(name string) bool { return strings.HasSuffix(name, "chunks.parquet") }

func isMetaFile(name string) bool {
return strings.HasSuffix(name, "/"+metadata.MetaFilename) || strings.HasSuffix(name, "/"+metadata.DeletionMarkFilename) || strings.HasSuffix(name, "/"+TenantDeletionMarkFile)
}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vendor/modules.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading