From fa67ec698e9863d6f06accbe4a95ca700303a845 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Thu, 21 Sep 2023 14:58:35 +0200 Subject: [PATCH] pkg/query: Improve filter performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) Only lowercase and convert filter string to bytes once. 2) Run filter on dictionary only and then match indices. 3) Run filter on all lines of a sample's locations at once to exit loop earlier. Filtering itself is 52% faster. ``` $ benchstat old.txt new.txt name old time/op new time/op delta FilterProfileData-10 0.05ns ± 1% 0.02ns ± 4% -52.41% (p=0.000 n=10+10) ``` And requests in total that filter about 17%. ``` $ benchstat old.txt new.txt name old time/op new time/op delta ColumnQueryAPIQueryFiltered-10 136ms ± 3% 112ms ± 4% -17.62% (p=0.008 n=5+5) ``` --- pkg/query/columnquery.go | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/pkg/query/columnquery.go b/pkg/query/columnquery.go index 245cb888884..1a43b3de367 100644 --- a/pkg/query/columnquery.go +++ b/pkg/query/columnquery.go @@ -279,13 +279,13 @@ func FilterProfileData( // We want to filter by function name case-insensitive, so we need to lowercase the query. // We lower case the query here, so we don't have to do it for every sample. - filterQuery = strings.ToLower(filterQuery) + filterQueryBytes := []byte(strings.ToLower(filterQuery)) res := make([]arrow.Record, 0, len(records)) allValues := int64(0) allFiltered := int64(0) for _, r := range records { - filteredRecord, valueSum, filteredSum, err := filterRecord(ctx, tracer, pool, r, filterQuery) + filteredRecord, valueSum, filteredSum, err := filterRecord(ctx, tracer, pool, r, filterQueryBytes) if err != nil { return nil, 0, fmt.Errorf("filter record: %w", err) } @@ -303,7 +303,7 @@ func filterRecord( tracer trace.Tracer, pool memory.Allocator, rec arrow.Record, - filterQuery string, + filterQueryBytes []byte, ) (arrow.Record, int64, int64, error) { r := profile.NewRecordReader(rec) @@ -316,16 +316,29 @@ func filterRecord( w := profile.NewWriter(pool, labelNames) defer w.RecordBuilder.Release() + indexMatches := map[uint32]struct{}{} + for i := 0; i < r.LineFunctionNameDict.Len(); i++ { + if bytes.Contains(bytes.ToLower(r.LineFunctionNameDict.Value(i)), filterQueryBytes) { + indexMatches[uint32(i)] = struct{}{} + } + } + + if len(indexMatches) == 0 { + return w.RecordBuilder.NewRecord(), math.Int64.Sum(r.Value), 0, nil + } + for i := 0; i < int(rec.NumRows()); i++ { lOffsetStart, lOffsetEnd := r.Locations.ValueOffsets(i) keepRow := false - for j := int(lOffsetStart); j < int(lOffsetEnd); j++ { - llOffsetStart, llOffsetEnd := r.Lines.ValueOffsets(j) - - for k := int(llOffsetStart); k < int(llOffsetEnd); k++ { - if r.LineFunctionNameIndices.IsValid(k) && bytes.Contains(bytes.ToLower(r.LineFunctionNameDict.Value(int(r.LineFunctionNameIndices.Value(k)))), []byte(filterQuery)) { - keepRow = true - break + if lOffsetStart < lOffsetEnd { + firstStart, _ := r.Lines.ValueOffsets(int(lOffsetStart)) + _, lastEnd := r.Lines.ValueOffsets(int(lOffsetEnd - 1)) + for k := int(firstStart); k < int(lastEnd); k++ { + if r.LineFunctionNameIndices.IsValid(k) { + if _, ok := indexMatches[r.LineFunctionNameIndices.Value(k)]; ok { + keepRow = true + break + } } } }