From 13cf2c280c89f75fdf9d448f78b18100cf1de360 Mon Sep 17 00:00:00 2001 From: Shankar Nair Date: Tue, 29 Oct 2024 11:34:18 -0700 Subject: [PATCH 1/8] Support tagfiltertree for fast matching metricIDs to queries --- src/metrics/tagfiltertree/README.md | 22 + src/metrics/tagfiltertree/options.go | 28 + src/metrics/tagfiltertree/pointer_set.go | 38 ++ src/metrics/tagfiltertree/pointer_set_test.go | 61 ++ src/metrics/tagfiltertree/tag_filter_tree.go | 327 +++++++++++ .../tag_filter_tree_benchmark_test.go | 110 ++++ .../tagfiltertree/tag_filter_tree_test.go | 530 ++++++++++++++++++ src/metrics/tagfiltertree/trie.go | 347 ++++++++++++ src/metrics/tagfiltertree/trie_char.go | 23 + src/metrics/tagfiltertree/trie_children.go | 50 ++ .../tagfiltertree/trie_children_test.go | 31 + src/metrics/tagfiltertree/trie_test.go | 416 ++++++++++++++ 12 files changed, 1983 insertions(+) create mode 100644 src/metrics/tagfiltertree/README.md create mode 100644 src/metrics/tagfiltertree/options.go create mode 100644 src/metrics/tagfiltertree/pointer_set.go create mode 100644 src/metrics/tagfiltertree/pointer_set_test.go create mode 100644 src/metrics/tagfiltertree/tag_filter_tree.go create mode 100644 src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go create mode 100644 src/metrics/tagfiltertree/tag_filter_tree_test.go create mode 100644 src/metrics/tagfiltertree/trie.go create mode 100644 src/metrics/tagfiltertree/trie_char.go create mode 100644 src/metrics/tagfiltertree/trie_children.go create mode 100644 src/metrics/tagfiltertree/trie_children_test.go create mode 100644 src/metrics/tagfiltertree/trie_test.go diff --git a/src/metrics/tagfiltertree/README.md b/src/metrics/tagfiltertree/README.md new file mode 100644 index 0000000000..2f363552bf --- /dev/null +++ b/src/metrics/tagfiltertree/README.md @@ -0,0 +1,22 @@ +# Tag Filter Tree + +## Motivation +There are many instances where we want to match an input metricID against +a set of tag filters. One such use-case is metric attribution to namespaces. +Iterating through each filter individually and matching them is extremely expensive +since it has to be done on each incoming metricID. Therefore, this data structure +pre-compiles a set of tag filters in order to optimize matches against an input metricID. + +## Usage +First create a trie using New() and then add tagFilters using AddTagFilter(). +The tags within a filter can be specified in any order but to condense the compiled +output of the trie, try and specify the most common set of tags in the beginning +and in the same order. +For instance, in case you have a tag "service" which you anticipate to be present +in all filters then make sure that is specified first and then specify the remaining tags +in the filter. +The trie also supports "*" for a tag value which can be used to ensure the existance of a tag +in the input metricID. + +## Caveats +The trie might return duplicates and it is up to the caller to de-dup the results. diff --git a/src/metrics/tagfiltertree/options.go b/src/metrics/tagfiltertree/options.go new file mode 100644 index 0000000000..43d048c140 --- /dev/null +++ b/src/metrics/tagfiltertree/options.go @@ -0,0 +1,28 @@ +package tagfiltertree + +import "github.com/m3db/m3/src/metrics/filters" + +// Options is a set of options for the attributor. +type Options interface { + TagFilterOptions() filters.TagsFilterOptions + SetTagFilterOptions(tf filters.TagsFilterOptions) Options +} + +type options struct { + tagFilterOptions filters.TagsFilterOptions +} + +// NewOptions creates a new set of options. +func NewOptions() Options { + return &options{} +} + +func (o *options) TagFilterOptions() filters.TagsFilterOptions { + return o.tagFilterOptions +} + +func (o *options) SetTagFilterOptions(tf filters.TagsFilterOptions) Options { + opts := *o + opts.tagFilterOptions = tf + return &opts +} diff --git a/src/metrics/tagfiltertree/pointer_set.go b/src/metrics/tagfiltertree/pointer_set.go new file mode 100644 index 0000000000..df178bd0bf --- /dev/null +++ b/src/metrics/tagfiltertree/pointer_set.go @@ -0,0 +1,38 @@ +package tagfiltertree + +import "math/bits" + +type PointerSet struct { + bits [2]uint64 // Using 2 uint64 gives us 128 bits (0 to 127). +} + +// Set adds a pointer at index i (0 <= i < 127). +func (ps *PointerSet) Set(i byte) { + if i < 64 { + ps.bits[0] |= (1 << i) + } else { + ps.bits[1] |= (1 << (i - 64)) + } +} + +// IsSet checks if a pointer is present at index i. +func (ps *PointerSet) IsSet(i byte) bool { + if i < 64 { + return ps.bits[0]&(1<>= 1 + } else { + if r&0x1 == 1 { + ps.Set(i) + } + r >>= 1 + } + } + + require.Equal(t, tt.expected, ps.CountSetBitsUntil(127)) + }) + } +} diff --git a/src/metrics/tagfiltertree/tag_filter_tree.go b/src/metrics/tagfiltertree/tag_filter_tree.go new file mode 100644 index 0000000000..ce22d69d12 --- /dev/null +++ b/src/metrics/tagfiltertree/tag_filter_tree.go @@ -0,0 +1,327 @@ +package tagfiltertree + +import ( + "fmt" + "strings" +) + +const ( + _matchall = "*" + _matchNone = "!*" +) + +type Tag struct { + Name string + Val string + Var string +} + +// Resolvable is an interface for types that can be stored in the tree. +type Resolvable[R any] interface { + // Resolve resolves the data based on the tags and returns + // the data with type R. + Resolve(tags map[string]string) (R, error) +} + +// Tree is a tree data structure for tag filters. +type Tree[T any] struct { + Nodes []*node[T] +} + +type NodeValue[T any] struct { + Val string + PatternTrie *Trie[T] + Tree *Tree[T] + Data []T +} + +type node[T any] struct { + Name string + AbsoluteValues map[string]NodeValue[T] + PatternValues []NodeValue[T] +} + +// New creates a new tree. +func New[T any]() *Tree[T] { + return &Tree[T]{ + Nodes: make([]*node[T], 0), + } +} + +// AddTagFilter adds a tag filter to the tree. +func (t *Tree[T]) AddTagFilter(tagFilter string, data T) error { + tags, err := TagsFromTagFilter(tagFilter) + if err != nil { + return err + } + return addNode(t, tags, 0, data) +} + +func (n *node[T]) addValue(filter string, data *T) (*Tree[T], error) { + if IsAbsoluteValue(filter) { + if n.AbsoluteValues == nil { + n.AbsoluteValues = make(map[string]NodeValue[T], 0) + } + if v, found := n.AbsoluteValues[filter]; found { + if data != nil { + if v.Data == nil { + v.Data = make([]T, 0) + } + v.Data = append(v.Data, *data) + n.AbsoluteValues[filter] = v + } + return v.Tree, nil + } + + newNodeValue := NewNodeValue[T](filter, nil, data) + n.AbsoluteValues[filter] = newNodeValue + + return newNodeValue.Tree, nil + } + + if n.PatternValues == nil { + n.PatternValues = make([]NodeValue[T], 0) + } + for i, v := range n.PatternValues { + if v.Val == filter { + if data != nil { + if v.Data == nil { + v.Data = make([]T, 0) + } + v.Data = append(v.Data, *data) + n.PatternValues[i] = v + } + return v.Tree, nil + } + } + + t := NewTrie[T]() + err := t.Insert(filter, nil) + if err != nil { + return nil, err + } + + newNodeValue := NewNodeValue[T](filter, t, data) + n.PatternValues = append(n.PatternValues, newNodeValue) + + return newNodeValue.Tree, nil +} + +func NewNodeValue[T any](val string, patternTrie *Trie[T], data *T) NodeValue[T] { + t := &Tree[T]{ + Nodes: make([]*node[T], 0), + } + + v := NodeValue[T]{ + Val: val, + PatternTrie: patternTrie, + Tree: t, + Data: nil, + } + + if data != nil { + if v.Data == nil { + v.Data = make([]T, 0) + } + v.Data = append(v.Data, *data) + } + + return v +} + +func addNode[T any](t *Tree[T], tags []Tag, idx int, data T) error { + if idx >= len(tags) { + return nil + } + + tag := tags[idx] + nodeIdx := -1 + for i, t := range t.Nodes { + if t.Name == tag.Name { + nodeIdx = i + break + } + } + if nodeIdx == -1 { + t.Nodes = append(t.Nodes, &node[T]{ + Name: tag.Name, + }) + nodeIdx = len(t.Nodes) - 1 + } + node := t.Nodes[nodeIdx] + + // Add the data to the childTree if this is the last tag. + var dataToAdd *T + if idx == len(tags)-1 { + dataToAdd = &data + } + + childTree, err := node.addValue(tag.Val, dataToAdd) + if err != nil { + return err + } + + // Recurse to the next tag. + if err := addNode(childTree, tags, idx+1, data); err != nil { + // TODO: perform cleanup to avoid partially added nodes. + return err + } + + return nil +} + +// Match returns the data for the given tags. +func (t *Tree[T]) Match(tags map[string]string, data *[]T) (bool, error) { + isMatchAny := false + if data == nil || *data == nil { + isMatchAny = true + } + return match(t, tags, data, isMatchAny) + +} + +func match[T any]( + t *Tree[T], + tags map[string]string, + data *[]T, + isMatchAny bool, +) (bool, error) { + if len(tags) == 0 || t == nil { + return false, nil + } + + for _, node := range t.Nodes { + name := node.Name + negate := false + if IsMatchNoneTag(name) { + name = name[1:] + negate = true + } + tagValue, tagNameFound := tags[name] + absVal, absValFound := node.AbsoluteValues[tagValue] + if tagNameFound && absValFound { + if data != nil && *data != nil { + *data = append(*data, absVal.Data...) + } + if isMatchAny && len(absVal.Data) > 0 { + return true, nil + } + + matched, err := match(absVal.Tree, tags, data, isMatchAny) + if err != nil { + return false, err + } + if isMatchAny && matched { + return true, nil + } + } + if tagNameFound != negate { + for _, v := range node.PatternValues { + matched, err := v.PatternTrie.Match(tagValue, nil) + if err != nil { + return false, err + } + if matched { + if data != nil && *data != nil { + *data = append(*data, v.Data...) + } + if isMatchAny && len(v.Data) > 0 { + return true, nil + } + matched, err := match(v.Tree, tags, data, isMatchAny) + if err != nil { + return false, err + } + if isMatchAny && matched { + return true, nil + } + } + } + } + } + + return false, nil +} + +// TagsFromTagFilter creates tags from a tag filter. +// The tag values can be of the format: +// "foo" OR "{foo,bar,baz}" OR "{{Variable}}" +// There cannot be a mix of value formats like "simpleValue, {foo,bar}" etc. +func TagsFromTagFilter(tf string) ([]Tag, error) { + tags := make([]Tag, 0) + tfSanitized := strings.TrimSpace(tf) + tagValuePairs := strings.Split(tfSanitized, " ") + for _, tagValuePair := range tagValuePairs { + tagAndValue := strings.Split(tagValuePair, ":") + if len(tagAndValue) != 2 { + return nil, fmt.Errorf("invalid tag filter: %s", tf) + } + tag := tagAndValue[0] + val := tagAndValue[1] + if len(tag) == 0 || len(val) == 0 { + return nil, fmt.Errorf("invalid tag filter: %s", tf) + } + + varName := "" + if IsVarTagValue(val) { + varName = val + val = _matchall + } + if val == _matchNone { + tag = "!" + tag + val = _matchall + } + + tags = append(tags, Tag{ + Name: tag, + Val: val, + Var: varName, + }) + } + + return tags, nil +} + +func IsVarTagValue(value string) bool { + if len(value) < 4 { + return false + } + + for i := 0; i < len(value); i++ { + if value[i] == '{' { + if i+1 < len(value) && value[i+1] == '{' { + if strings.Contains(value[i+1:], "}}") { + return true + } + } + } + } + + return false +} + +func IsMatchNoneTag(tagName string) bool { + if len(tagName) == 0 { + return false + } + return tagName[0] == '!' +} + +func IsAbsoluteValue(val string) bool { + return !strings.ContainsAny(val, "{!*[") +} + +/* +tag1:value1 tag2:value2 tag3:value3 +tag1:foo1 tag4:* tag8:val* + +tag1 -> value1 ----> + tag2 -> value2 ----> + tag3 -> value3 D-> R1 + foo1 ----> + tag4 -> * ----> + tag8 -> val* D-> R2 + +input: + tag1:foo1 tag4:foobar tag8:value8 +*/ diff --git a/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go b/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go new file mode 100644 index 0000000000..d8205c9d91 --- /dev/null +++ b/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go @@ -0,0 +1,110 @@ +package tagfiltertree + +import ( + "fmt" + "math/rand" + "strings" + "testing" + "time" + + "github.com/influxdata/influxdb/pkg/testing/assert" +) + +const ( + _numFilters = 100 +) + +// Predefined tag names and values for benchmarks +var tags = []string{"tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7", "tag8", "tag9", "tag10"} +var values = []string{"value1", "value2", "value3", "value4", "value5"} + +// Filter Types +const ( + ABSOLUTE = 0 + WILDCARD = 1 + NEGATION = 2 + COMPOSITE = 3 +) + +/* +* Benchmark results: +* BenchmarkTagFilterTreeMatch/MatchAll-10 673851 1561 ns/op 190 B/op 0 allocs/op +* BenchmarkTagFilterTreeMatch/MatchAny-10 4203621 256.0 ns/op 0 B/op 0 allocs/op + */ +func BenchmarkTagFilterTreeMatch(b *testing.B) { + // Generate a benchmark set of tag filters with varying complexity + filters := make([]string, 0) + for i := 0; i < _numFilters; i++ { + filters = append(filters, generateTagFilter(1+rnd.Intn(len(tags)-1))) // 10 filters in the string + } + input := map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + } + tree := New[*Rule]() + data := &Rule{} + + for _, f := range filters { + err := tree.AddTagFilter(f, data) + assert.NoError(b, err) + } + + b.ResetTimer() + b.Run("MatchAll", func(b *testing.B) { + resultData := make([]*Rule, 0, 10) + for i := 0; i < b.N; i++ { + _, _ = tree.Match(input, &resultData) + } + }) + + b.ResetTimer() + b.Run("MatchAny", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = tree.Match(input, nil) + } + }) +} + +// Seeded random number generator for deterministic results +var rnd = rand.New(rand.NewSource(time.Now().UnixNano())) + +// Generate a single random tag filter based on type +func generateTagValuePair(idx int, tagType int) string { + tag := tags[idx] + switch tagType { + case ABSOLUTE: + value := values[rnd.Intn(len(values))] + return fmt.Sprintf("%s:%s", tag, value) + case WILDCARD: + return fmt.Sprintf("%s:*", tag) + case NEGATION: + if rnd.Intn(2) == 0 { + return fmt.Sprintf("%s:!*", tag) // tag should not exist + } else { + value := values[rnd.Intn(len(values))] + return fmt.Sprintf("%s:!%s", tag, value) // tag should not have this value + } + case COMPOSITE: + value1 := values[rnd.Intn(len(values))] + value2 := values[rnd.Intn(len(values))] + return fmt.Sprintf("%s:{%s,%s}", tag, value1, value2) // matches either value1 or value2 + default: + return "" + } +} + +// Generate a tag filter with the specified number of tags. +func generateTagFilter(numTags int) string { + var filter []string + + idxs := rnd.Perm(len(tags)) + for i := 0; i < numTags; i++ { + tagType := rnd.Intn(4) // Randomly choose from ABSOLUTE, WILDCARD, NEGATION, COMPOSITE + pair := generateTagValuePair(idxs[i], tagType) + filter = append(filter, pair) + } + return strings.Join(filter, " ") +} diff --git a/src/metrics/tagfiltertree/tag_filter_tree_test.go b/src/metrics/tagfiltertree/tag_filter_tree_test.go new file mode 100644 index 0000000000..75dd608c5a --- /dev/null +++ b/src/metrics/tagfiltertree/tag_filter_tree_test.go @@ -0,0 +1,530 @@ +package tagfiltertree + +import ( + "errors" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/require" +) + +type Rule struct { + TagFilters []string + Namespace string +} + +func (r *Rule) Resolve(inputTags map[string]string) (string, error) { + if !IsVarTagValue(r.Namespace) { + return r.Namespace, nil + } + + varMap := make(map[string]string) + for _, tagFilter := range r.TagFilters { + tags, err := TagsFromTagFilter(tagFilter) + if err != nil { + return "", err + } + for _, tag := range tags { + if tag.Var != "" { + // is var tag + inputTagValue, ok := inputTags[tag.Name] + if !ok { + return "", errors.New("tag not found") + } + varMap[tag.Var] = inputTagValue + } + } + } + + ns := r.Namespace + for varName, varValue := range varMap { + ns = strings.ReplaceAll(ns, varName, varValue) + } + return ns, nil +} + +func TestTreeGetData(t *testing.T) { + tests := []struct { + name string + inputTags map[string]string + rules []Rule + expected []string + }{ + { + name: "multiple input tags, multiple filters, multiple rules, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag4:value4 tag5:value5", + }, + Namespace: "namespace2", + }, + { + TagFilters: []string{ + "tag5:*", + }, + Namespace: "namespace3", + }, + { + TagFilters: []string{ + "tag8:value8 tag9:value9", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace4", + }, + }, + expected: []string{"namespace1", "namespace2", "namespace3"}, + }, + { + name: "multiple input tags, common prefix tags, match one", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "apple", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:apple", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:banana", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple input tags, common prefix tags, existing value, match one", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "apple", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:apple tag4:banana", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:apple", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace2"}, + }, + { + name: "multiple input tags, common prefix tags, existing value with wildcard, match one", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "apple", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:* tag4:banana", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:*", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace2"}, + }, + { + name: "multiple input tags, multiple filters, multiple rules, multiple values, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:val1 tag4:value4 tag5:value5", + }, + Namespace: "namespaceA", + }, + { + TagFilters: []string{ + "tag1:val* tag4:value4 tag5:value5", + }, + Namespace: "namespace2", + }, + { + TagFilters: []string{ + "tag5:*", + }, + Namespace: "namespace3", + }, + { + TagFilters: []string{ + "tag8:value8 tag9:value9", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace4", + }, + }, + expected: []string{"namespace1", "namespace2", "namespace3"}, + }, + { + name: "single rule, all negation, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:!*", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "single rule, all negation, no match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag2:!*", + }, + Namespace: "namespace1", + }, + }, + expected: []string{}, + }, + { + name: "single rule, negation, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:!{value1,value2}", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple rules, negation, single match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:!* tag2:value2", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:value1 tag6:!*", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace2"}, + }, + { + name: "single input tag, single filter, match", + inputTags: map[string]string{ + "tag1": "value1", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple input tags, single filter, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "single input tag, multiple filters, match", + inputTags: map[string]string{ + "tag1": "value1", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1", + "tag2:value2", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "single input tag, multiple filters, no match", + inputTags: map[string]string{ + "tag1": "value1", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:value1", + "tag2:value2", + }, + Namespace: "namespace1", + }, + }, + expected: nil, + }, + { + name: "multiple input tags, multiple filters, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2", + "tag4:value2 tag5:value2", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple input tags, multiple filters, no match", + inputTags: map[string]string{ + "tag3": "value3", + "tag4": "value4", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag7:*", + }, + Namespace: "namespace2", + }, + }, + expected: nil, + }, + { + name: "multiple input tags, composite tag value, match", + inputTags: map[string]string{ + "tag3": "apple", + "tag4": "banana", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:{value3,apple} tag4:{value4,banana}", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple input tags, var tag value, single rule match", + inputTags: map[string]string{ + "tag3": "apple", + "tag4": "banana", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:{{VAR1}} tag4:{{VAR2}}", + "tag5:value5 tag6:value6", + }, + Namespace: "{{VAR1}}_{{VAR2}}", + }, + }, + expected: []string{"apple_banana"}, + }, + { + name: "multiple input tags, var tag value, multi rule match", + inputTags: map[string]string{ + "tag3": "apple", + "tag4": "banana", + "tag5": "train", + "tag6": "car", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:{{VAR1}} tag4:{{VAR2}}", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace1_{{VAR1}}_{{VAR2}}", + }, + { + TagFilters: []string{ + "tag4:{{VAR5}} tag5:{{VAR6}} tag6:{{VAR2}}", + "tag5:value5 tag7:value7", + }, + Namespace: "{{VAR6}}_{{VAR6}}_{{VAR2}}_{{VAR5}}", + }, + }, + expected: []string{ + "namespace1_apple_banana", + "train_train_car_banana", + }, + }, + { + name: "multiple input tags, wildcard tag value, multi rule match", + inputTags: map[string]string{ + "tag3": "apple", + "tag4": "banana", + "tag5": "train", + "tag6": "car", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:app* tag4:ban*", + "tag5:train tag6:value6", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag4:bag* tag5:* tag6:c*", + "tag5:value5 tag7:value7", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace1"}, + }, + } + + less := func(a, b string) bool { return a < b } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tree := New[*Rule]() + for _, rule := range tt.rules { + for _, tagFilter := range rule.TagFilters { + err := tree.AddTagFilter(tagFilter, &rule) + require.NoError(t, err) + } + } + + // check all match + actual := make([]*Rule, 0, len(tt.expected)) + _, err := tree.Match(tt.inputTags, &actual) + resolved := make([]string, 0, len(actual)) + for _, r := range actual { + ns, err := r.Resolve(tt.inputTags) + require.NoError(t, err) + resolved = append(resolved, ns) + } + + require.NoError(t, err) + if len(tt.expected) == 0 { + require.Empty(t, actual) + return + } + + actualNamespaces := uniqueNamespaces(resolved) + require.Equal(t, "", cmp.Diff(tt.expected, actualNamespaces, cmpopts.SortSlices(less))) + + // check any match + anyMatched, err := tree.Match(tt.inputTags, nil) + require.Equal(t, len(tt.expected) > 0, anyMatched) + }) + } +} + +func uniqueNamespaces(input []string) []string { + unique := make(map[string]struct{}) + for _, r := range input { + unique[r] = struct{}{} + } + + output := make([]string, 0, len(unique)) + for ns, _ := range unique { + output = append(output, ns) + } + return output +} diff --git a/src/metrics/tagfiltertree/trie.go b/src/metrics/tagfiltertree/trie.go new file mode 100644 index 0000000000..4ddcb63bac --- /dev/null +++ b/src/metrics/tagfiltertree/trie.go @@ -0,0 +1,347 @@ +package tagfiltertree + +import ( + "fmt" + "strings" +) + +type TrieNode[T any] struct { + ch trieChar + children trieChildren[TrieNode[T]] + data []T +} + +func NewEmptyNode[T any](ch byte) *TrieNode[T] { + return &TrieNode[T]{ + ch: NewTrieChar(ch), + children: newTrieChildren[TrieNode[T]](), + data: nil, + } +} + +type Trie[T any] struct { + root *TrieNode[T] +} + +func NewTrie[T any]() *Trie[T] { + return &Trie[T]{ + root: nil, + } +} + +func (tr *Trie[T]) Insert(pattern string, data *T) error { + if err := ValidatePattern(pattern); err != nil { + return err + } + + if tr.root == nil { + tr.root = NewEmptyNode[T]('^') + } + + return insertHelper(tr.root, pattern, 0, len(pattern), data) +} + +func ValidatePattern(pattern string) error { + if len(pattern) == 0 { + return fmt.Errorf("empty pattern") + } + + if strings.Contains(pattern, "**") { + return fmt.Errorf("invalid pattern") + } + + if strings.Contains(pattern, "{") != strings.Contains(pattern, "}") { + return fmt.Errorf("invalid pattern") + } + + // pattern cannot have more than one pair of '{' and '}'. + if strings.Count(pattern, "{") > 1 || strings.Count(pattern, "}") > 1 { + return fmt.Errorf("multiple composite patterns not supported") + } + + openIdx := strings.Index(pattern, "{") + if openIdx != -1 { + closeIdx := strings.Index(pattern, "}") + if closeIdx != -1 { + // we do not support negation within composite patterns. + if strings.Contains(pattern[openIdx:closeIdx], "!") { + return fmt.Errorf("negation not supported in composite patterns") + } + } + } + + if strings.Contains(pattern, "[") != strings.Contains(pattern, "]") { + return fmt.Errorf("invalid pattern") + } + + // at the moment we only support single char ranges. + if strings.Count(pattern, "[") > 1 || strings.Count(pattern, "]") > 1 { + return fmt.Errorf("multiple char range patterns not supported") + } + + // we do not support certain special chars in the pattern. + if strings.ContainsAny(pattern, "?") { + return fmt.Errorf("invalid special chars in pattern") + } + + for _, ch := range pattern { + if ch > 127 { + return fmt.Errorf("invalid character in pattern %c", ch) + } + } + + return nil +} + +func insertHelper[T any]( + root *TrieNode[T], + pattern string, + startIdx int, + endIdx int, + data *T, +) error { + if root == nil { + return nil + } + + if startIdx == endIdx { + return nil + } + + var ( + err error + options []string + ) + + if pattern[startIdx] == '{' { + options, err = ParseCompositePattern(pattern[startIdx:]) + if err != nil { + return err + } + } + + if pattern[startIdx] == '[' { + options, err = ParseCharRange(pattern[startIdx:]) + if err != nil { + return err + } + } + + for _, option := range options { + if err := insertHelper( + root, + option, + 0, + len(option), + data, + ); err != nil { + return err + } + } + + if len(options) > 0 { + // matched composite or char range pattern. + return nil + } + + var node *TrieNode[T] + if root.children.Exists(pattern[startIdx]) { + node = root.children.Get(pattern[startIdx]) + } + if node == nil { + node = NewEmptyNode[T](pattern[startIdx]) + } + + if node.ch.Char() == '!' { + if data != nil { + node.data = append(node.data, *data) + } + } + + root.children.Insert(pattern[startIdx], node) + + if startIdx == len(pattern)-1 { + // found the leaf node. + node.ch.SetEnd() + if data != nil { + node.data = append(node.data, *data) + } + } + + return insertHelper(node, pattern, startIdx+1, endIdx, data) +} + +func (tr *Trie[T]) Match(input string, data *[]T) (bool, error) { + return matchHelper(tr.root, input, 0, data) +} + +func matchHelper[T any]( + root *TrieNode[T], + input string, + startIdx int, + data *[]T, +) (bool, error) { + if root == nil { + return false, nil + } + + if startIdx == len(input) { + // looks for patterns that end with a '*'. + var child *TrieNode[T] + if root.children.Exists('*') { + child = root.children.Get('*') + } + if child != nil && child.ch.IsEnd() { + if data != nil { + *data = append(*data, child.data...) + } + return true, nil + } + + return false, nil + } + + matched := false + // check matchAll case. + var child *TrieNode[T] + if root.children.Exists('*') { + child = root.children.Get('*') + } + if child != nil { + var err error + + // move forward in the pattern and input. + if startIdx < len(input) { + var subChild *TrieNode[T] + if child.children.Exists(input[startIdx]) { + subChild = child.children.Get(input[startIdx]) + } + if subChild != nil { + matchedOne, err := matchHelper(subChild, input, startIdx+1, data) + if err != nil { + return false, err + } + matched = matched || matchedOne + } + } + + // stay on the '*'. + matchedAll, err := matchHelper(root, input, startIdx+1, data) + if err != nil { + return false, err + } + + matched = matched || matchedAll + } + + child = nil + if root.children.Exists('!') { + child = root.children.Get('!') + } + if child != nil { + matchedNegate, err := matchHelper(child, input, startIdx, nil) + if err != nil { + return false, err + } + + matched = matched || !matchedNegate + if !matchedNegate { + if data != nil { + *data = append(*data, child.data...) + } + } + } + + var subChild *TrieNode[T] + if root.children.Exists(input[startIdx]) { + subChild = root.children.Get(input[startIdx]) + } + if subChild != nil { + matchedOne, err := matchHelper(subChild, input, startIdx+1, data) + if err != nil { + return false, err + } + matched = matched || matchedOne + + if startIdx == len(input)-1 { + if subChild.ch.IsEnd() { + if data != nil { + *data = append(*data, subChild.data...) + } + matched = true + } + } + } + + return matched, nil +} + +// ParseCompositePattern extracts the options from a composite pattern. +// It appends the suffix (the segment after the '}' character) to each option. +// It does not check for other invalid chars in the pattern. Those checks should +// be done before calling this function. +func ParseCompositePattern(pattern string) ([]string, error) { + openIdx := strings.Index(pattern, "{") + closeIdx := strings.Index(pattern, "}") + + if openIdx == -1 || closeIdx == -1 { + return nil, fmt.Errorf("invalid pattern") + } + + if closeIdx != strings.LastIndex(pattern, "}") { + return nil, fmt.Errorf("invalid pattern") + } + + optionStr := pattern[openIdx+1 : closeIdx] + optionSuffix := "" + if closeIdx < len(pattern)-1 { + optionSuffix = pattern[closeIdx+1:] + } + optionPrefix := pattern[:openIdx] + + options := strings.Split(optionStr, ",") + for i := range options { + option := strings.TrimSpace(options[i]) + options[i] = optionPrefix + option + optionSuffix + } + + return options, nil +} + +// ParseCharRange extracts the options from a char range pattern. +// It does not check for other invalid chars in the pattern. Those checks should +// be done before calling this function. +func ParseCharRange(pattern string) ([]string, error) { + openIdx := strings.Index(pattern, "[") + closeIdx := strings.Index(pattern, "]") + + if openIdx == -1 || closeIdx == -1 { + return nil, fmt.Errorf("invalid pattern") + } + + if closeIdx != strings.LastIndex(pattern, "]") { + return nil, fmt.Errorf("invalid pattern") + } + + optionStr := pattern[openIdx+1 : closeIdx] + optionSuffix := "" + if closeIdx < len(pattern)-1 { + optionSuffix = pattern[closeIdx+1:] + } + optionPrefix := pattern[:openIdx] + + options := make([]string, 0) + for i := 0; i < len(optionStr); i++ { + if i+2 < len(optionStr) && optionStr[i+1] == '-' { + for ch := optionStr[i]; ch <= optionStr[i+2]; ch++ { + options = append(options, optionPrefix+string(ch)+optionSuffix) + } + i += 2 + continue + } + + options = append(options, optionPrefix+string(optionStr[i])+optionSuffix) + } + + return options, nil +} diff --git a/src/metrics/tagfiltertree/trie_char.go b/src/metrics/tagfiltertree/trie_char.go new file mode 100644 index 0000000000..248fe17485 --- /dev/null +++ b/src/metrics/tagfiltertree/trie_char.go @@ -0,0 +1,23 @@ +package tagfiltertree + +type trieChar struct { + ch byte +} + +func (c *trieChar) SetEnd() { + c.ch |= 0x01 +} + +func (c *trieChar) IsEnd() bool { + return c.ch&0x01 == 0x01 +} + +func NewTrieChar(ch byte) trieChar { + return trieChar{ + ch: ch << 1, + } +} + +func (c *trieChar) Char() byte { + return c.ch >> 1 +} diff --git a/src/metrics/tagfiltertree/trie_children.go b/src/metrics/tagfiltertree/trie_children.go new file mode 100644 index 0000000000..7d7780f0a2 --- /dev/null +++ b/src/metrics/tagfiltertree/trie_children.go @@ -0,0 +1,50 @@ +package tagfiltertree + +import "fmt" + +type trieChildren[T any] struct { + children []*T + pointerSet PointerSet +} + +func newTrieChildren[T any]() trieChildren[T] { + return trieChildren[T]{ + children: nil, + } +} + +func (tc *trieChildren[T]) Insert(ch byte, data *T) error { + if tc.pointerSet.IsSet(ch) { + return fmt.Errorf("character already exists") + } + + newIdx := tc.pointerSet.CountSetBitsUntil(ch) + + // make room for the new child. + tc.children = append(tc.children, nil) + + for i := len(tc.children) - 1; i > newIdx; i-- { + tc.children[i] = tc.children[i-1] + } + + // set the new data in the correct idx. + tc.children[newIdx] = data + + // set the idx of the new child. + tc.pointerSet.Set(ch) + + return nil +} + +func (tc *trieChildren[T]) Get(ch byte) *T { + if !tc.pointerSet.IsSet(ch) { + return nil + } + + childIdx := tc.pointerSet.CountSetBitsUntil(ch) - 1 + return tc.children[childIdx] +} + +func (tc *trieChildren[T]) Exists(ch byte) bool { + return tc.pointerSet.IsSet(ch) +} diff --git a/src/metrics/tagfiltertree/trie_children_test.go b/src/metrics/tagfiltertree/trie_children_test.go new file mode 100644 index 0000000000..ec9823fb86 --- /dev/null +++ b/src/metrics/tagfiltertree/trie_children_test.go @@ -0,0 +1,31 @@ +package tagfiltertree + +import ( + "testing" +) + +func TestTrieChildrenGet(t *testing.T) { + tc := newTrieChildren[int]() + + // Insert a child. + data := 42 + err := tc.Insert('a', &data) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Get the child. + got := tc.Get('a') + if got == nil { + t.Fatalf("expected child to exist") + } + if *got != data { + t.Fatalf("unexpected data: got %v, want %v", *got, data) + } + + // Get a non-existing child. + got = tc.Get('b') + if got != nil { + t.Fatalf("unexpected child: got %v, want nil", *got) + } +} diff --git a/src/metrics/tagfiltertree/trie_test.go b/src/metrics/tagfiltertree/trie_test.go new file mode 100644 index 0000000000..ebec1a3b3d --- /dev/null +++ b/src/metrics/tagfiltertree/trie_test.go @@ -0,0 +1,416 @@ +package tagfiltertree + +import ( + "reflect" + "sort" + "testing" + + "github.com/stretchr/testify/require" +) + +type Pattern struct { + pattern string + data string +} + +func TestTrieMatch(t *testing.T) { + tests := []struct { + name string + patterns []Pattern + input string + expectedMatch bool + expectedData []string + isDisabled bool + }{ + { + name: "suffix wildcard minimum match", + patterns: []Pattern{ + { + pattern: "foo*", + data: "data1", + }, + }, + input: "foo", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "suffix wildcard match", + patterns: []Pattern{ + { + pattern: "foo*", + data: "data1", + }, + }, + input: "foo123", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "suffix wildcard suffix match", + patterns: []Pattern{ + { + pattern: "foo*", + data: "data1", + }, + }, + input: "foofoofoofoo", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "suffix wildcard no match", + patterns: []Pattern{ + { + pattern: "foo*", + data: "data1", + }, + }, + input: "fobar", + expectedMatch: false, + expectedData: []string{}, + }, + { + name: "prefix wildcard minimum match", + patterns: []Pattern{ + { + pattern: "*foo", + data: "data1", + }, + }, + input: "foo", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "prefix wildcard prefix match", + patterns: []Pattern{ + { + pattern: "*foo", + data: "data1", + }, + }, + input: "fofofofoo", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "prefix wildcard no match", + patterns: []Pattern{ + { + pattern: "*foo", + data: "data1", + }, + }, + input: "barfobo", + expectedMatch: false, + expectedData: []string{}, + }, + { + name: "single exact match", + patterns: []Pattern{ + { + pattern: "foobar", + data: "data1", + }, + }, + input: "foobar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "single suffix and prefix wildcard match", + patterns: []Pattern{ + { + pattern: "*foo*", + data: "data1", + }, + }, + input: "barfoobar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "single negation match", + patterns: []Pattern{ + { + pattern: "!foo", + data: "data1", + }, + }, + input: "bar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "single negation with wildcard match", + patterns: []Pattern{ + { + pattern: "!*foo*", + data: "data1", + }, + }, + input: "fofobar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "composite, absolute match, single match", + patterns: []Pattern{ + { + pattern: "{foo,bar}", + data: "data1", + }, + }, + input: "bar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "composite starting middle, wildcards, single match", + patterns: []Pattern{ + { + pattern: "*choco{*foo*,*bar*}late*", + data: "data1", + }, + }, + input: "123chocobarlate456", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "char range, wildcards, single match", + patterns: []Pattern{ + { + pattern: "[a-d]*", + data: "data1", + }, + }, + input: "chocolate", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "composite, wildcard match, single match", + patterns: []Pattern{ + { + pattern: "{*foo*,*bar*}", + data: "data1", + }, + }, + input: "bar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "single negative composite, wildcards, no match", + patterns: []Pattern{ + { + pattern: "!{*foo*,*bar*}", + data: "data1", + }, + }, + input: "bar", + expectedMatch: false, + expectedData: []string{}, + }, + { + name: "composite, char range, wildcards, single match", + patterns: []Pattern{ + { + pattern: "{abc, [1-4]*, *foo*}", + data: "data1", + }, + }, + input: "30000000", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "char range without dash, wildcards, no match", + patterns: []Pattern{ + { + pattern: "[14]*", + data: "data1", + }, + }, + input: "30000000", + expectedMatch: false, + expectedData: []string{}, + }, + { + name: "char range without dash, wildcards, match", + patterns: []Pattern{ + { + pattern: "[14]*", + data: "data1", + }, + }, + input: "40000000", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "multiple composite, wildcard and absolute match, single match", + patterns: []Pattern{ + { + pattern: "{*foo*,*bal*,batbaz}", + data: "data1", + }, + { + pattern: "*batbaz", + data: "data2", + }, + }, + input: "batbaz", + expectedMatch: true, + expectedData: []string{"data1", "data2"}, + }, + { + name: "multiple intersecting composite, wildcard and absolute match, all match", + patterns: []Pattern{ + { + pattern: "!{*foo*,*bal*}", + data: "data1", + }, + { + pattern: "!{*bal*,gaz*}", + data: "data2", + }, + }, + input: "batbaz", + expectedMatch: true, + expectedData: []string{"data1", "data2"}, + // disable test since we don't support multiple negative composite filters at the moment. + isDisabled: true, + }, + { + name: "multiple intersecting composite, wildcard and absolute match, single match", + patterns: []Pattern{ + { + pattern: "!{*foo*,*bal*}", + data: "data1", + }, + { + pattern: "!{*bal*,gaz*}", + data: "data2", + }, + }, + input: "foo", + expectedMatch: true, + expectedData: []string{"data2"}, + // disable test since we don't support multiple negative composite filters at the moment. + isDisabled: true, + }, + { + name: "multiple intersecting composite, wildcard and absolute match, no match", + patterns: []Pattern{ + { + pattern: "!{*foo*,*bal*}", + data: "data1", + }, + { + pattern: "!{*bal*,gaz*}", + data: "data2", + }, + }, + input: "banbalabal", + expectedMatch: false, + expectedData: []string{}, + // disable test since we don't support multiple negative composite filters at the moment. + isDisabled: true, + }, + { + name: "multiple negations with wildcard, single match", + patterns: []Pattern{ + { + pattern: "!*foo*", + data: "data1", + }, + { + pattern: "!*bar*", + data: "data2", + }, + { + pattern: "!*baz*", + data: "data3", + }, + }, + input: "fofoobar", + expectedMatch: true, + expectedData: []string{"data3"}, + // disable test since we don't support multiple negative composite filters at the moment. + isDisabled: true, + }, + } + + for _, tt := range tests { + runTest := func(withData bool) { + if tt.isDisabled { + return + } + trie := NewTrie[string]() + for _, rule := range tt.patterns { + var data *string + if withData { + data = &rule.data + } + trie.Insert(rule.pattern, data) + } + + var data []string + if withData { + data = make([]string, 0) + } + matched, err := trie.Match(tt.input, &data) + require.NoError(t, err) + + // check match. + require.Equal(t, tt.expectedMatch, matched) + + if !withData { + for i := range data { + require.Empty(t, data[i]) + } + return + } + + if len(tt.expectedData) == 0 { + require.Empty(t, data) + return + } + + // dedup data. + data = deduplicateData(data) + + // check data. + sort.Slice(data, func(i, j int) bool { + return data[i] < data[j] + }) + sort.Slice(tt.expectedData, func(i, j int) bool { + return tt.expectedData[i] < tt.expectedData[j] + }) + + require.Equal(t, true, reflect.DeepEqual(data, tt.expectedData)) + } + + t.Run(tt.name+"_with_data", func(t *testing.T) { + runTest(true) + }) + t.Run(tt.name+"_without_data", func(t *testing.T) { + runTest(false) + }) + } +} + +func deduplicateData(data []string) []string { + dataMap := make(map[string]struct{}) + for i := range data { + dataMap[data[i]] = struct{}{} + } + data = make([]string, 0, len(dataMap)) + for k := range dataMap { + data = append(data, k) + } + return data +} From 4c96204121d097348493b1b4cea3d1e342919bf6 Mon Sep 17 00:00:00 2001 From: Shankar Nair Date: Tue, 29 Oct 2024 12:52:48 -0700 Subject: [PATCH 2/8] lint fixes --- src/metrics/tagfiltertree/options.go | 2 ++ src/metrics/tagfiltertree/pointer_set.go | 14 +++++++------ src/metrics/tagfiltertree/tag_filter_tree.go | 21 ++++++------------- .../tag_filter_tree_benchmark_test.go | 6 +++--- .../tagfiltertree/tag_filter_tree_test.go | 1 + src/metrics/tagfiltertree/trie.go | 11 +++++++++- src/metrics/tagfiltertree/trie_char.go | 4 ++++ src/metrics/tagfiltertree/trie_children.go | 7 ++++--- src/metrics/tagfiltertree/trie_test.go | 4 ++-- 9 files changed, 40 insertions(+), 30 deletions(-) diff --git a/src/metrics/tagfiltertree/options.go b/src/metrics/tagfiltertree/options.go index 43d048c140..08be319591 100644 --- a/src/metrics/tagfiltertree/options.go +++ b/src/metrics/tagfiltertree/options.go @@ -17,10 +17,12 @@ func NewOptions() Options { return &options{} } +// TagFilterOptions returns the tag filter options. func (o *options) TagFilterOptions() filters.TagsFilterOptions { return o.tagFilterOptions } +// SetTagFilterOptions sets the tag filter options. func (o *options) SetTagFilterOptions(tf filters.TagsFilterOptions) Options { opts := *o opts.tagFilterOptions = tf diff --git a/src/metrics/tagfiltertree/pointer_set.go b/src/metrics/tagfiltertree/pointer_set.go index df178bd0bf..fbf0c3f3ad 100644 --- a/src/metrics/tagfiltertree/pointer_set.go +++ b/src/metrics/tagfiltertree/pointer_set.go @@ -2,6 +2,8 @@ package tagfiltertree import "math/bits" +// PointerSet is a set of pointers backed by a bitmap to +// represent a sparse set of at most 127 pointers. type PointerSet struct { bits [2]uint64 // Using 2 uint64 gives us 128 bits (0 to 127). } @@ -28,11 +30,11 @@ func (ps *PointerSet) CountSetBitsUntil(i byte) int { if i < 64 { // Count bits in the first uint64 up to index i. return bits.OnesCount64(ps.bits[0] & ((1 << (i + 1)) - 1)) - } else { - // Count all bits in the first uint64. - count := bits.OnesCount64(ps.bits[0]) - // Count bits in the second uint64 up to index i - 64. - count += bits.OnesCount64(ps.bits[1] & ((1 << (i - 64 + 1)) - 1)) - return count } + + // Count all bits in the first uint64. + count := bits.OnesCount64(ps.bits[0]) + // Count bits in the second uint64 up to index i - 64. + count += bits.OnesCount64(ps.bits[1] & ((1 << (i - 64 + 1)) - 1)) + return count } diff --git a/src/metrics/tagfiltertree/tag_filter_tree.go b/src/metrics/tagfiltertree/tag_filter_tree.go index ce22d69d12..227eb45241 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree.go +++ b/src/metrics/tagfiltertree/tag_filter_tree.go @@ -10,6 +10,7 @@ const ( _matchNone = "!*" ) +// Tag represents a tag with name, value and variable. type Tag struct { Name string Val string @@ -28,6 +29,7 @@ type Tree[T any] struct { Nodes []*node[T] } +// NodeValue represents a value in a node of the Tree. type NodeValue[T any] struct { Val string PatternTrie *Trie[T] @@ -107,6 +109,7 @@ func (n *node[T]) addValue(filter string, data *T) (*Tree[T], error) { return newNodeValue.Tree, nil } +// NewNodeValue creates a new NodeValue. func NewNodeValue[T any](val string, patternTrie *Trie[T], data *T) NodeValue[T] { t := &Tree[T]{ Nodes: make([]*node[T], 0), @@ -282,6 +285,7 @@ func TagsFromTagFilter(tf string) ([]Tag, error) { return tags, nil } +// IsVarTagValue returns true if the value is a variable tag value. func IsVarTagValue(value string) bool { if len(value) < 4 { return false @@ -300,6 +304,7 @@ func IsVarTagValue(value string) bool { return false } +// IsMatchNoneTag returns true if the tag is a match none tag. func IsMatchNoneTag(tagName string) bool { if len(tagName) == 0 { return false @@ -307,21 +312,7 @@ func IsMatchNoneTag(tagName string) bool { return tagName[0] == '!' } +// IsAbsoluteValue returns true if the value is an absolute value. func IsAbsoluteValue(val string) bool { return !strings.ContainsAny(val, "{!*[") } - -/* -tag1:value1 tag2:value2 tag3:value3 -tag1:foo1 tag4:* tag8:val* - -tag1 -> value1 ----> - tag2 -> value2 ----> - tag3 -> value3 D-> R1 - foo1 ----> - tag4 -> * ----> - tag8 -> val* D-> R2 - -input: - tag1:foo1 tag4:foobar tag8:value8 -*/ diff --git a/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go b/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go index d8205c9d91..06a60a8761 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go +++ b/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go @@ -83,10 +83,10 @@ func generateTagValuePair(idx int, tagType int) string { case NEGATION: if rnd.Intn(2) == 0 { return fmt.Sprintf("%s:!*", tag) // tag should not exist - } else { - value := values[rnd.Intn(len(values))] - return fmt.Sprintf("%s:!%s", tag, value) // tag should not have this value } + + value := values[rnd.Intn(len(values))] + return fmt.Sprintf("%s:!%s", tag, value) // tag should not have this value case COMPOSITE: value1 := values[rnd.Intn(len(values))] value2 := values[rnd.Intn(len(values))] diff --git a/src/metrics/tagfiltertree/tag_filter_tree_test.go b/src/metrics/tagfiltertree/tag_filter_tree_test.go index 75dd608c5a..e415e59584 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree_test.go +++ b/src/metrics/tagfiltertree/tag_filter_tree_test.go @@ -511,6 +511,7 @@ func TestTreeGetData(t *testing.T) { // check any match anyMatched, err := tree.Match(tt.inputTags, nil) + require.NoError(t, err) require.Equal(t, len(tt.expected) > 0, anyMatched) }) } diff --git a/src/metrics/tagfiltertree/trie.go b/src/metrics/tagfiltertree/trie.go index 4ddcb63bac..9bc394de7b 100644 --- a/src/metrics/tagfiltertree/trie.go +++ b/src/metrics/tagfiltertree/trie.go @@ -5,12 +5,14 @@ import ( "strings" ) +// TrieNode represents a node in the trie. type TrieNode[T any] struct { ch trieChar children trieChildren[TrieNode[T]] data []T } +// NewEmptyNode creates a new empty node with the given character. func NewEmptyNode[T any](ch byte) *TrieNode[T] { return &TrieNode[T]{ ch: NewTrieChar(ch), @@ -19,16 +21,19 @@ func NewEmptyNode[T any](ch byte) *TrieNode[T] { } } +// Trie represents a trie data structure. type Trie[T any] struct { root *TrieNode[T] } +// NewTrie creates a new trie. func NewTrie[T any]() *Trie[T] { return &Trie[T]{ root: nil, } } +// Insert inserts a new pattern and data into the trie. func (tr *Trie[T]) Insert(pattern string, data *T) error { if err := ValidatePattern(pattern); err != nil { return err @@ -41,6 +46,7 @@ func (tr *Trie[T]) Insert(pattern string, data *T) error { return insertHelper(tr.root, pattern, 0, len(pattern), data) } +// ValidatePattern validates the given pattern. func ValidatePattern(pattern string) error { if len(pattern) == 0 { return fmt.Errorf("empty pattern") @@ -158,7 +164,9 @@ func insertHelper[T any]( } } - root.children.Insert(pattern[startIdx], node) + if err := root.children.Insert(pattern[startIdx], node); err != nil { + return err + } if startIdx == len(pattern)-1 { // found the leaf node. @@ -171,6 +179,7 @@ func insertHelper[T any]( return insertHelper(node, pattern, startIdx+1, endIdx, data) } +// Match matches the given input against the trie. func (tr *Trie[T]) Match(input string, data *[]T) (bool, error) { return matchHelper(tr.root, input, 0, data) } diff --git a/src/metrics/tagfiltertree/trie_char.go b/src/metrics/tagfiltertree/trie_char.go index 248fe17485..34476faa35 100644 --- a/src/metrics/tagfiltertree/trie_char.go +++ b/src/metrics/tagfiltertree/trie_char.go @@ -4,20 +4,24 @@ type trieChar struct { ch byte } +// SetEnd sets the end flag of the character. func (c *trieChar) SetEnd() { c.ch |= 0x01 } +// IsEnd returns true if the character is the end of a pattern. func (c *trieChar) IsEnd() bool { return c.ch&0x01 == 0x01 } +// NewTrieChar creates a new trie character. func NewTrieChar(ch byte) trieChar { return trieChar{ ch: ch << 1, } } +// Char returns the character. func (c *trieChar) Char() byte { return c.ch >> 1 } diff --git a/src/metrics/tagfiltertree/trie_children.go b/src/metrics/tagfiltertree/trie_children.go index 7d7780f0a2..b4fc43e549 100644 --- a/src/metrics/tagfiltertree/trie_children.go +++ b/src/metrics/tagfiltertree/trie_children.go @@ -1,7 +1,5 @@ package tagfiltertree -import "fmt" - type trieChildren[T any] struct { children []*T pointerSet PointerSet @@ -13,9 +11,11 @@ func newTrieChildren[T any]() trieChildren[T] { } } +// Insert inserts a new child with the given byte and data. func (tc *trieChildren[T]) Insert(ch byte, data *T) error { if tc.pointerSet.IsSet(ch) { - return fmt.Errorf("character already exists") + // already exists. + return nil } newIdx := tc.pointerSet.CountSetBitsUntil(ch) @@ -45,6 +45,7 @@ func (tc *trieChildren[T]) Get(ch byte) *T { return tc.children[childIdx] } +// Exists returns true if the child with the given byte exists. func (tc *trieChildren[T]) Exists(ch byte) bool { return tc.pointerSet.IsSet(ch) } diff --git a/src/metrics/tagfiltertree/trie_test.go b/src/metrics/tagfiltertree/trie_test.go index ebec1a3b3d..6ea6283cab 100644 --- a/src/metrics/tagfiltertree/trie_test.go +++ b/src/metrics/tagfiltertree/trie_test.go @@ -15,11 +15,11 @@ type Pattern struct { func TestTrieMatch(t *testing.T) { tests := []struct { - name string patterns []Pattern + expectedData []string + name string input string expectedMatch bool - expectedData []string isDisabled bool }{ { From bde1d752c0219b537085d64e65675cb54271547b Mon Sep 17 00:00:00 2001 From: Shankar Nair Date: Tue, 29 Oct 2024 13:30:56 -0700 Subject: [PATCH 3/8] lint fixes --- src/metrics/tagfiltertree/tag_filter_tree_test.go | 2 +- src/metrics/tagfiltertree/trie.go | 2 +- src/metrics/tagfiltertree/trie_char.go | 4 ++-- src/metrics/tagfiltertree/trie_test.go | 4 +++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/metrics/tagfiltertree/tag_filter_tree_test.go b/src/metrics/tagfiltertree/tag_filter_tree_test.go index e415e59584..fd1054f061 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree_test.go +++ b/src/metrics/tagfiltertree/tag_filter_tree_test.go @@ -524,7 +524,7 @@ func uniqueNamespaces(input []string) []string { } output := make([]string, 0, len(unique)) - for ns, _ := range unique { + for ns := range unique { output = append(output, ns) } return output diff --git a/src/metrics/tagfiltertree/trie.go b/src/metrics/tagfiltertree/trie.go index 9bc394de7b..cdd8f5e895 100644 --- a/src/metrics/tagfiltertree/trie.go +++ b/src/metrics/tagfiltertree/trie.go @@ -15,7 +15,7 @@ type TrieNode[T any] struct { // NewEmptyNode creates a new empty node with the given character. func NewEmptyNode[T any](ch byte) *TrieNode[T] { return &TrieNode[T]{ - ch: NewTrieChar(ch), + ch: newTrieChar(ch), children: newTrieChildren[TrieNode[T]](), data: nil, } diff --git a/src/metrics/tagfiltertree/trie_char.go b/src/metrics/tagfiltertree/trie_char.go index 34476faa35..dfbcf7c747 100644 --- a/src/metrics/tagfiltertree/trie_char.go +++ b/src/metrics/tagfiltertree/trie_char.go @@ -14,8 +14,8 @@ func (c *trieChar) IsEnd() bool { return c.ch&0x01 == 0x01 } -// NewTrieChar creates a new trie character. -func NewTrieChar(ch byte) trieChar { +// newTrieChar creates a new trie character. +func newTrieChar(ch byte) trieChar { return trieChar{ ch: ch << 1, } diff --git a/src/metrics/tagfiltertree/trie_test.go b/src/metrics/tagfiltertree/trie_test.go index 6ea6283cab..763e09e710 100644 --- a/src/metrics/tagfiltertree/trie_test.go +++ b/src/metrics/tagfiltertree/trie_test.go @@ -355,7 +355,9 @@ func TestTrieMatch(t *testing.T) { if withData { data = &rule.data } - trie.Insert(rule.pattern, data) + + err := trie.Insert(rule.pattern, data) + require.NoError(t, err) } var data []string From ee6c313f51074d82e094847255bd1d6d31828c19 Mon Sep 17 00:00:00 2001 From: Shankar Nair Date: Tue, 29 Oct 2024 17:35:30 -0700 Subject: [PATCH 4/8] lint fixes --- src/metrics/tagfiltertree/tag_filter_tree.go | 47 +++++----- src/metrics/tagfiltertree/trie.go | 94 ++++++++++---------- src/metrics/tagfiltertree/trie_children.go | 28 +++--- 3 files changed, 84 insertions(+), 85 deletions(-) diff --git a/src/metrics/tagfiltertree/tag_filter_tree.go b/src/metrics/tagfiltertree/tag_filter_tree.go index 227eb45241..c57c4e6751 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree.go +++ b/src/metrics/tagfiltertree/tag_filter_tree.go @@ -29,20 +29,6 @@ type Tree[T any] struct { Nodes []*node[T] } -// NodeValue represents a value in a node of the Tree. -type NodeValue[T any] struct { - Val string - PatternTrie *Trie[T] - Tree *Tree[T] - Data []T -} - -type node[T any] struct { - Name string - AbsoluteValues map[string]NodeValue[T] - PatternValues []NodeValue[T] -} - // New creates a new tree. func New[T any]() *Tree[T] { return &Tree[T]{ @@ -59,6 +45,29 @@ func (t *Tree[T]) AddTagFilter(tagFilter string, data T) error { return addNode(t, tags, 0, data) } +// Match returns the data for the given tags. +func (t *Tree[T]) Match(tags map[string]string, data *[]T) (bool, error) { + isMatchAny := false + if data == nil || *data == nil { + isMatchAny = true + } + return match(t, tags, data, isMatchAny) +} + +// NodeValue represents a value in a node of the Tree. +type NodeValue[T any] struct { + Val string + PatternTrie *Trie[T] + Tree *Tree[T] + Data []T +} + +type node[T any] struct { + Name string + AbsoluteValues map[string]NodeValue[T] + PatternValues []NodeValue[T] +} + func (n *node[T]) addValue(filter string, data *T) (*Tree[T], error) { if IsAbsoluteValue(filter) { if n.AbsoluteValues == nil { @@ -173,16 +182,6 @@ func addNode[T any](t *Tree[T], tags []Tag, idx int, data T) error { return nil } -// Match returns the data for the given tags. -func (t *Tree[T]) Match(tags map[string]string, data *[]T) (bool, error) { - isMatchAny := false - if data == nil || *data == nil { - isMatchAny = true - } - return match(t, tags, data, isMatchAny) - -} - func match[T any]( t *Tree[T], tags map[string]string, diff --git a/src/metrics/tagfiltertree/trie.go b/src/metrics/tagfiltertree/trie.go index cdd8f5e895..a1b1cf5de0 100644 --- a/src/metrics/tagfiltertree/trie.go +++ b/src/metrics/tagfiltertree/trie.go @@ -5,31 +5,31 @@ import ( "strings" ) -// TrieNode represents a node in the trie. -type TrieNode[T any] struct { - ch trieChar - children trieChildren[TrieNode[T]] - data []T +// trieNode represents a node in the trie. +type trieNode[T any] struct { + Ch trieChar + Children trieChildren[trieNode[T]] + Data []T } -// NewEmptyNode creates a new empty node with the given character. -func NewEmptyNode[T any](ch byte) *TrieNode[T] { - return &TrieNode[T]{ - ch: newTrieChar(ch), - children: newTrieChildren[TrieNode[T]](), - data: nil, +// newEmptyNode creates a new empty node with the given character. +func newEmptyNode[T any](ch byte) *trieNode[T] { + return &trieNode[T]{ + Ch: newTrieChar(ch), + Children: newTrieChildren[trieNode[T]](), + Data: nil, } } // Trie represents a trie data structure. type Trie[T any] struct { - root *TrieNode[T] + Root *trieNode[T] } // NewTrie creates a new trie. func NewTrie[T any]() *Trie[T] { return &Trie[T]{ - root: nil, + Root: nil, } } @@ -39,11 +39,11 @@ func (tr *Trie[T]) Insert(pattern string, data *T) error { return err } - if tr.root == nil { - tr.root = NewEmptyNode[T]('^') + if tr.Root == nil { + tr.Root = newEmptyNode[T]('^') } - return insertHelper(tr.root, pattern, 0, len(pattern), data) + return insertHelper(tr.Root, pattern, 0, len(pattern), data) } // ValidatePattern validates the given pattern. @@ -100,7 +100,7 @@ func ValidatePattern(pattern string) error { } func insertHelper[T any]( - root *TrieNode[T], + root *trieNode[T], pattern string, startIdx int, endIdx int, @@ -150,29 +150,29 @@ func insertHelper[T any]( return nil } - var node *TrieNode[T] - if root.children.Exists(pattern[startIdx]) { - node = root.children.Get(pattern[startIdx]) + var node *trieNode[T] + if root.Children.Exists(pattern[startIdx]) { + node = root.Children.Get(pattern[startIdx]) } if node == nil { - node = NewEmptyNode[T](pattern[startIdx]) + node = newEmptyNode[T](pattern[startIdx]) } - if node.ch.Char() == '!' { + if node.Ch.Char() == '!' { if data != nil { - node.data = append(node.data, *data) + node.Data = append(node.Data, *data) } } - if err := root.children.Insert(pattern[startIdx], node); err != nil { + if err := root.Children.Insert(pattern[startIdx], node); err != nil { return err } if startIdx == len(pattern)-1 { // found the leaf node. - node.ch.SetEnd() + node.Ch.SetEnd() if data != nil { - node.data = append(node.data, *data) + node.Data = append(node.Data, *data) } } @@ -181,11 +181,11 @@ func insertHelper[T any]( // Match matches the given input against the trie. func (tr *Trie[T]) Match(input string, data *[]T) (bool, error) { - return matchHelper(tr.root, input, 0, data) + return matchHelper(tr.Root, input, 0, data) } func matchHelper[T any]( - root *TrieNode[T], + root *trieNode[T], input string, startIdx int, data *[]T, @@ -196,13 +196,13 @@ func matchHelper[T any]( if startIdx == len(input) { // looks for patterns that end with a '*'. - var child *TrieNode[T] - if root.children.Exists('*') { - child = root.children.Get('*') + var child *trieNode[T] + if root.Children.Exists('*') { + child = root.Children.Get('*') } - if child != nil && child.ch.IsEnd() { + if child != nil && child.Ch.IsEnd() { if data != nil { - *data = append(*data, child.data...) + *data = append(*data, child.Data...) } return true, nil } @@ -212,18 +212,18 @@ func matchHelper[T any]( matched := false // check matchAll case. - var child *TrieNode[T] - if root.children.Exists('*') { - child = root.children.Get('*') + var child *trieNode[T] + if root.Children.Exists('*') { + child = root.Children.Get('*') } if child != nil { var err error // move forward in the pattern and input. if startIdx < len(input) { - var subChild *TrieNode[T] - if child.children.Exists(input[startIdx]) { - subChild = child.children.Get(input[startIdx]) + var subChild *trieNode[T] + if child.Children.Exists(input[startIdx]) { + subChild = child.Children.Get(input[startIdx]) } if subChild != nil { matchedOne, err := matchHelper(subChild, input, startIdx+1, data) @@ -244,8 +244,8 @@ func matchHelper[T any]( } child = nil - if root.children.Exists('!') { - child = root.children.Get('!') + if root.Children.Exists('!') { + child = root.Children.Get('!') } if child != nil { matchedNegate, err := matchHelper(child, input, startIdx, nil) @@ -256,14 +256,14 @@ func matchHelper[T any]( matched = matched || !matchedNegate if !matchedNegate { if data != nil { - *data = append(*data, child.data...) + *data = append(*data, child.Data...) } } } - var subChild *TrieNode[T] - if root.children.Exists(input[startIdx]) { - subChild = root.children.Get(input[startIdx]) + var subChild *trieNode[T] + if root.Children.Exists(input[startIdx]) { + subChild = root.Children.Get(input[startIdx]) } if subChild != nil { matchedOne, err := matchHelper(subChild, input, startIdx+1, data) @@ -273,9 +273,9 @@ func matchHelper[T any]( matched = matched || matchedOne if startIdx == len(input)-1 { - if subChild.ch.IsEnd() { + if subChild.Ch.IsEnd() { if data != nil { - *data = append(*data, subChild.data...) + *data = append(*data, subChild.Data...) } matched = true } diff --git a/src/metrics/tagfiltertree/trie_children.go b/src/metrics/tagfiltertree/trie_children.go index b4fc43e549..1f8fd9cd57 100644 --- a/src/metrics/tagfiltertree/trie_children.go +++ b/src/metrics/tagfiltertree/trie_children.go @@ -1,51 +1,51 @@ package tagfiltertree type trieChildren[T any] struct { - children []*T - pointerSet PointerSet + Children []*T + PointerSet PointerSet } func newTrieChildren[T any]() trieChildren[T] { return trieChildren[T]{ - children: nil, + Children: nil, } } // Insert inserts a new child with the given byte and data. func (tc *trieChildren[T]) Insert(ch byte, data *T) error { - if tc.pointerSet.IsSet(ch) { + if tc.PointerSet.IsSet(ch) { // already exists. return nil } - newIdx := tc.pointerSet.CountSetBitsUntil(ch) + newIdx := tc.PointerSet.CountSetBitsUntil(ch) // make room for the new child. - tc.children = append(tc.children, nil) + tc.Children = append(tc.Children, nil) - for i := len(tc.children) - 1; i > newIdx; i-- { - tc.children[i] = tc.children[i-1] + for i := len(tc.Children) - 1; i > newIdx; i-- { + tc.Children[i] = tc.Children[i-1] } // set the new data in the correct idx. - tc.children[newIdx] = data + tc.Children[newIdx] = data // set the idx of the new child. - tc.pointerSet.Set(ch) + tc.PointerSet.Set(ch) return nil } func (tc *trieChildren[T]) Get(ch byte) *T { - if !tc.pointerSet.IsSet(ch) { + if !tc.PointerSet.IsSet(ch) { return nil } - childIdx := tc.pointerSet.CountSetBitsUntil(ch) - 1 - return tc.children[childIdx] + childIdx := tc.PointerSet.CountSetBitsUntil(ch) - 1 + return tc.Children[childIdx] } // Exists returns true if the child with the given byte exists. func (tc *trieChildren[T]) Exists(ch byte) bool { - return tc.pointerSet.IsSet(ch) + return tc.PointerSet.IsSet(ch) } From b9368bb831d9a0f65457b10b715400a75e5164b4 Mon Sep 17 00:00:00 2001 From: Shankar Nair Date: Tue, 29 Oct 2024 23:40:00 -0700 Subject: [PATCH 5/8] review comments --- src/metrics/tagfiltertree/tag_filter_tree.go | 6 +++--- src/metrics/tagfiltertree/tag_filter_tree_test.go | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/metrics/tagfiltertree/tag_filter_tree.go b/src/metrics/tagfiltertree/tag_filter_tree.go index c57c4e6751..c345c86915 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree.go +++ b/src/metrics/tagfiltertree/tag_filter_tree.go @@ -195,7 +195,7 @@ func match[T any]( for _, node := range t.Nodes { name := node.Name negate := false - if IsMatchNoneTag(name) { + if IsNegation(name) { name = name[1:] negate = true } @@ -303,8 +303,8 @@ func IsVarTagValue(value string) bool { return false } -// IsMatchNoneTag returns true if the tag is a match none tag. -func IsMatchNoneTag(tagName string) bool { +// IsNegation returns true if the tag is a match none tag. +func IsNegation(tagName string) bool { if len(tagName) == 0 { return false } diff --git a/src/metrics/tagfiltertree/tag_filter_tree_test.go b/src/metrics/tagfiltertree/tag_filter_tree_test.go index fd1054f061..9d02fe9de9 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree_test.go +++ b/src/metrics/tagfiltertree/tag_filter_tree_test.go @@ -308,7 +308,7 @@ func TestTreeGetData(t *testing.T) { rules: []Rule{ { TagFilters: []string{ - "tag1:value1", + "tag1:value1 tag2:value2 tag3:value3", }, Namespace: "namespace1", }, @@ -492,7 +492,8 @@ func TestTreeGetData(t *testing.T) { // check all match actual := make([]*Rule, 0, len(tt.expected)) - _, err := tree.Match(tt.inputTags, &actual) + matched, err := tree.Match(tt.inputTags, &actual) + require.Equal(t, len(tt.expected) > 0, matched) resolved := make([]string, 0, len(actual)) for _, r := range actual { ns, err := r.Resolve(tt.inputTags) From 342322548208409fcbc286ad9e2ff0a620dd227d Mon Sep 17 00:00:00 2001 From: Shankar Nair Date: Wed, 30 Oct 2024 00:21:26 -0700 Subject: [PATCH 6/8] fix bug in Match() logic --- src/metrics/tagfiltertree/tag_filter_tree.go | 44 ++++++++++++++----- .../tagfiltertree/tag_filter_tree_test.go | 24 ++++++++++ 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/src/metrics/tagfiltertree/tag_filter_tree.go b/src/metrics/tagfiltertree/tag_filter_tree.go index c345c86915..86584ab9b3 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree.go +++ b/src/metrics/tagfiltertree/tag_filter_tree.go @@ -192,6 +192,13 @@ func match[T any]( return false, nil } + var ( + err error + matched bool + matchedAbsSubtree bool + matchedPatternSubtree bool + ) + for _, node := range t.Nodes { name := node.Name negate := false @@ -205,44 +212,59 @@ func match[T any]( if data != nil && *data != nil { *data = append(*data, absVal.Data...) } - if isMatchAny && len(absVal.Data) > 0 { - return true, nil + if len(absVal.Data) > 0 { + // matched data. + if isMatchAny { + return true, nil + } + matched = true } - matched, err := match(absVal.Tree, tags, data, isMatchAny) + matchedAbsSubtree, err = match(absVal.Tree, tags, data, isMatchAny) if err != nil { return false, err } - if isMatchAny && matched { + + if matchedAbsSubtree && isMatchAny { return true, nil } + + matched = matched || matchedAbsSubtree } if tagNameFound != negate { for _, v := range node.PatternValues { - matched, err := v.PatternTrie.Match(tagValue, nil) + valMatched, err := v.PatternTrie.Match(tagValue, nil) if err != nil { return false, err } - if matched { + if valMatched { if data != nil && *data != nil { *data = append(*data, v.Data...) } - if isMatchAny && len(v.Data) > 0 { - return true, nil + if len(v.Data) > 0 { + // matched data. + if isMatchAny { + return true, nil + } + + matched = true } - matched, err := match(v.Tree, tags, data, isMatchAny) + matchedPatternSubtree, err = match(v.Tree, tags, data, isMatchAny) if err != nil { return false, err } - if isMatchAny && matched { + + if matchedPatternSubtree && isMatchAny { return true, nil } + + matched = matched || matchedPatternSubtree } } } } - return false, nil + return matched, nil } // TagsFromTagFilter creates tags from a tag filter. diff --git a/src/metrics/tagfiltertree/tag_filter_tree_test.go b/src/metrics/tagfiltertree/tag_filter_tree_test.go index 9d02fe9de9..b6b1c33a1b 100644 --- a/src/metrics/tagfiltertree/tag_filter_tree_test.go +++ b/src/metrics/tagfiltertree/tag_filter_tree_test.go @@ -52,6 +52,30 @@ func TestTreeGetData(t *testing.T) { rules []Rule expected []string }{ + { + name: "multiple input tags, common prefix tag, absolute and wildcard, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "apple", + "tag4": "banana", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:apple", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:val* tag2:value2 tag4:banana", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace1", "namespace2"}, + }, { name: "multiple input tags, multiple filters, multiple rules, match", inputTags: map[string]string{ From c65aa85a6bf4d0ac1c9c5ff18230c5dab061e442 Mon Sep 17 00:00:00 2001 From: Shankar Nair Date: Wed, 30 Oct 2024 09:58:11 -0700 Subject: [PATCH 7/8] remove golint since it is deprecated. It was causing issues with detecting generic types --- .golangci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.golangci.yml b/.golangci.yml index 452c27ee57..c244fd4dad 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -186,7 +186,6 @@ linters: - gci - goconst - gocritic - - golint - gosimple - govet - ineffassign From 41769a231de79b6beefec1e6b9b3bb6401ebb93b Mon Sep 17 00:00:00 2001 From: Shankar Nair Date: Wed, 30 Oct 2024 17:05:14 -0700 Subject: [PATCH 8/8] update README.md --- src/metrics/tagfiltertree/README.md | 58 ++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/src/metrics/tagfiltertree/README.md b/src/metrics/tagfiltertree/README.md index 2f363552bf..d9861fe020 100644 --- a/src/metrics/tagfiltertree/README.md +++ b/src/metrics/tagfiltertree/README.md @@ -2,10 +2,24 @@ ## Motivation There are many instances where we want to match an input metricID against -a set of tag filters. One such use-case is metric attribution to namespaces. -Iterating through each filter individually and matching them is extremely expensive -since it has to be done on each incoming metricID. Therefore, this data structure -pre-compiles a set of tag filters in order to optimize matches against an input metricID. +a set of tag filter patterns. Iterating through each filter individually and matching +them is extremely expensive since it has to be done on each incoming metricID. +One example is you want to drop incoming metricIDs that matches any +of the configured tag filter patterns. Instead of iterating over all configured tag +filter patterns, they could be structured as a tree and then quickly prune non-relevant +tag filter patterns from being matched and thereby also reducing the reducdancy in matching +the same tag filters over and over that are common between the tag filter patterns. + +Example: +Consider a set of tag filter patterns: +1. service:foo env:prod* name:num_requests +2. service:foo env:staging* name:*num_errors + +When an input metricID comes in: +service:foo env:staging-123 name:login.num_errors +it would be wasteful to match "service:foo" again. Also once we know that "env:staging*" +matched then we only need to look at the "name:*num_errors" to find a match for the +input metricID. ## Usage First create a trie using New() and then add tagFilters using AddTagFilter(). @@ -15,8 +29,40 @@ and in the same order. For instance, in case you have a tag "service" which you anticipate to be present in all filters then make sure that is specified first and then specify the remaining tags in the filter. -The trie also supports "*" for a tag value which can be used to ensure the existance of a tag -in the input metricID. +So basically re-order the tags filters in the tag filter pattern based on the popularity +of the tag filter. + +There are two ways to use the Match() API. +1. Match All +When calling AddTagFilter() you can attach data to that tag filter pattern. This data +can be retreived during Match(). Consequently, a Match() call with an input metricID +can match multiple tag filter patterns thus multiple data. The 2nd parameter passed to +Match() is a data slice to hold the matched data. + +2. Match Any +In case we are simply interested in knowing whether the input metricID matched any of the +tag filter patterns and don't really care about which in particular then we can call Match() +with the 2nd parameter as nil. This results an even faster code path than the one above. + +```go + tree := New[*Rule]() + for _, rule := range tt.rules { + for _, tagFilterPattern := range rule.TagFiltersPatterns { + err := tree.AddTagFilter(tagFilterPattern, &rule) + require.NoError(t, err) + } + } + + ... + // check match all + data := make([]*Rule, 0) + matched, err := tree.Match(inputTags, &data) + + ... + // check match any + anyMatched, err := tree.Match(inputTags, nil) + ... +``` ## Caveats The trie might return duplicates and it is up to the caller to de-dup the results.