diff --git a/.golangci.yml b/.golangci.yml index 452c27ee57..c244fd4dad 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -186,7 +186,6 @@ linters: - gci - goconst - gocritic - - golint - gosimple - govet - ineffassign diff --git a/src/metrics/tagfiltertree/README.md b/src/metrics/tagfiltertree/README.md new file mode 100644 index 0000000000..d9861fe020 --- /dev/null +++ b/src/metrics/tagfiltertree/README.md @@ -0,0 +1,68 @@ +# Tag Filter Tree + +## Motivation +There are many instances where we want to match an input metricID against +a set of tag filter patterns. Iterating through each filter individually and matching +them is extremely expensive since it has to be done on each incoming metricID. +One example is you want to drop incoming metricIDs that matches any +of the configured tag filter patterns. Instead of iterating over all configured tag +filter patterns, they could be structured as a tree and then quickly prune non-relevant +tag filter patterns from being matched and thereby also reducing the reducdancy in matching +the same tag filters over and over that are common between the tag filter patterns. + +Example: +Consider a set of tag filter patterns: +1. service:foo env:prod* name:num_requests +2. service:foo env:staging* name:*num_errors + +When an input metricID comes in: +service:foo env:staging-123 name:login.num_errors +it would be wasteful to match "service:foo" again. Also once we know that "env:staging*" +matched then we only need to look at the "name:*num_errors" to find a match for the +input metricID. + +## Usage +First create a trie using New() and then add tagFilters using AddTagFilter(). +The tags within a filter can be specified in any order but to condense the compiled +output of the trie, try and specify the most common set of tags in the beginning +and in the same order. +For instance, in case you have a tag "service" which you anticipate to be present +in all filters then make sure that is specified first and then specify the remaining tags +in the filter. +So basically re-order the tags filters in the tag filter pattern based on the popularity +of the tag filter. + +There are two ways to use the Match() API. +1. Match All +When calling AddTagFilter() you can attach data to that tag filter pattern. This data +can be retreived during Match(). Consequently, a Match() call with an input metricID +can match multiple tag filter patterns thus multiple data. The 2nd parameter passed to +Match() is a data slice to hold the matched data. + +2. Match Any +In case we are simply interested in knowing whether the input metricID matched any of the +tag filter patterns and don't really care about which in particular then we can call Match() +with the 2nd parameter as nil. This results an even faster code path than the one above. + +```go + tree := New[*Rule]() + for _, rule := range tt.rules { + for _, tagFilterPattern := range rule.TagFiltersPatterns { + err := tree.AddTagFilter(tagFilterPattern, &rule) + require.NoError(t, err) + } + } + + ... + // check match all + data := make([]*Rule, 0) + matched, err := tree.Match(inputTags, &data) + + ... + // check match any + anyMatched, err := tree.Match(inputTags, nil) + ... +``` + +## Caveats +The trie might return duplicates and it is up to the caller to de-dup the results. diff --git a/src/metrics/tagfiltertree/options.go b/src/metrics/tagfiltertree/options.go new file mode 100644 index 0000000000..08be319591 --- /dev/null +++ b/src/metrics/tagfiltertree/options.go @@ -0,0 +1,30 @@ +package tagfiltertree + +import "github.com/m3db/m3/src/metrics/filters" + +// Options is a set of options for the attributor. +type Options interface { + TagFilterOptions() filters.TagsFilterOptions + SetTagFilterOptions(tf filters.TagsFilterOptions) Options +} + +type options struct { + tagFilterOptions filters.TagsFilterOptions +} + +// NewOptions creates a new set of options. +func NewOptions() Options { + return &options{} +} + +// TagFilterOptions returns the tag filter options. +func (o *options) TagFilterOptions() filters.TagsFilterOptions { + return o.tagFilterOptions +} + +// SetTagFilterOptions sets the tag filter options. +func (o *options) SetTagFilterOptions(tf filters.TagsFilterOptions) Options { + opts := *o + opts.tagFilterOptions = tf + return &opts +} diff --git a/src/metrics/tagfiltertree/pointer_set.go b/src/metrics/tagfiltertree/pointer_set.go new file mode 100644 index 0000000000..fbf0c3f3ad --- /dev/null +++ b/src/metrics/tagfiltertree/pointer_set.go @@ -0,0 +1,40 @@ +package tagfiltertree + +import "math/bits" + +// PointerSet is a set of pointers backed by a bitmap to +// represent a sparse set of at most 127 pointers. +type PointerSet struct { + bits [2]uint64 // Using 2 uint64 gives us 128 bits (0 to 127). +} + +// Set adds a pointer at index i (0 <= i < 127). +func (ps *PointerSet) Set(i byte) { + if i < 64 { + ps.bits[0] |= (1 << i) + } else { + ps.bits[1] |= (1 << (i - 64)) + } +} + +// IsSet checks if a pointer is present at index i. +func (ps *PointerSet) IsSet(i byte) bool { + if i < 64 { + return ps.bits[0]&(1<>= 1 + } else { + if r&0x1 == 1 { + ps.Set(i) + } + r >>= 1 + } + } + + require.Equal(t, tt.expected, ps.CountSetBitsUntil(127)) + }) + } +} diff --git a/src/metrics/tagfiltertree/tag_filter_tree.go b/src/metrics/tagfiltertree/tag_filter_tree.go new file mode 100644 index 0000000000..86584ab9b3 --- /dev/null +++ b/src/metrics/tagfiltertree/tag_filter_tree.go @@ -0,0 +1,339 @@ +package tagfiltertree + +import ( + "fmt" + "strings" +) + +const ( + _matchall = "*" + _matchNone = "!*" +) + +// Tag represents a tag with name, value and variable. +type Tag struct { + Name string + Val string + Var string +} + +// Resolvable is an interface for types that can be stored in the tree. +type Resolvable[R any] interface { + // Resolve resolves the data based on the tags and returns + // the data with type R. + Resolve(tags map[string]string) (R, error) +} + +// Tree is a tree data structure for tag filters. +type Tree[T any] struct { + Nodes []*node[T] +} + +// New creates a new tree. +func New[T any]() *Tree[T] { + return &Tree[T]{ + Nodes: make([]*node[T], 0), + } +} + +// AddTagFilter adds a tag filter to the tree. +func (t *Tree[T]) AddTagFilter(tagFilter string, data T) error { + tags, err := TagsFromTagFilter(tagFilter) + if err != nil { + return err + } + return addNode(t, tags, 0, data) +} + +// Match returns the data for the given tags. +func (t *Tree[T]) Match(tags map[string]string, data *[]T) (bool, error) { + isMatchAny := false + if data == nil || *data == nil { + isMatchAny = true + } + return match(t, tags, data, isMatchAny) +} + +// NodeValue represents a value in a node of the Tree. +type NodeValue[T any] struct { + Val string + PatternTrie *Trie[T] + Tree *Tree[T] + Data []T +} + +type node[T any] struct { + Name string + AbsoluteValues map[string]NodeValue[T] + PatternValues []NodeValue[T] +} + +func (n *node[T]) addValue(filter string, data *T) (*Tree[T], error) { + if IsAbsoluteValue(filter) { + if n.AbsoluteValues == nil { + n.AbsoluteValues = make(map[string]NodeValue[T], 0) + } + if v, found := n.AbsoluteValues[filter]; found { + if data != nil { + if v.Data == nil { + v.Data = make([]T, 0) + } + v.Data = append(v.Data, *data) + n.AbsoluteValues[filter] = v + } + return v.Tree, nil + } + + newNodeValue := NewNodeValue[T](filter, nil, data) + n.AbsoluteValues[filter] = newNodeValue + + return newNodeValue.Tree, nil + } + + if n.PatternValues == nil { + n.PatternValues = make([]NodeValue[T], 0) + } + for i, v := range n.PatternValues { + if v.Val == filter { + if data != nil { + if v.Data == nil { + v.Data = make([]T, 0) + } + v.Data = append(v.Data, *data) + n.PatternValues[i] = v + } + return v.Tree, nil + } + } + + t := NewTrie[T]() + err := t.Insert(filter, nil) + if err != nil { + return nil, err + } + + newNodeValue := NewNodeValue[T](filter, t, data) + n.PatternValues = append(n.PatternValues, newNodeValue) + + return newNodeValue.Tree, nil +} + +// NewNodeValue creates a new NodeValue. +func NewNodeValue[T any](val string, patternTrie *Trie[T], data *T) NodeValue[T] { + t := &Tree[T]{ + Nodes: make([]*node[T], 0), + } + + v := NodeValue[T]{ + Val: val, + PatternTrie: patternTrie, + Tree: t, + Data: nil, + } + + if data != nil { + if v.Data == nil { + v.Data = make([]T, 0) + } + v.Data = append(v.Data, *data) + } + + return v +} + +func addNode[T any](t *Tree[T], tags []Tag, idx int, data T) error { + if idx >= len(tags) { + return nil + } + + tag := tags[idx] + nodeIdx := -1 + for i, t := range t.Nodes { + if t.Name == tag.Name { + nodeIdx = i + break + } + } + if nodeIdx == -1 { + t.Nodes = append(t.Nodes, &node[T]{ + Name: tag.Name, + }) + nodeIdx = len(t.Nodes) - 1 + } + node := t.Nodes[nodeIdx] + + // Add the data to the childTree if this is the last tag. + var dataToAdd *T + if idx == len(tags)-1 { + dataToAdd = &data + } + + childTree, err := node.addValue(tag.Val, dataToAdd) + if err != nil { + return err + } + + // Recurse to the next tag. + if err := addNode(childTree, tags, idx+1, data); err != nil { + // TODO: perform cleanup to avoid partially added nodes. + return err + } + + return nil +} + +func match[T any]( + t *Tree[T], + tags map[string]string, + data *[]T, + isMatchAny bool, +) (bool, error) { + if len(tags) == 0 || t == nil { + return false, nil + } + + var ( + err error + matched bool + matchedAbsSubtree bool + matchedPatternSubtree bool + ) + + for _, node := range t.Nodes { + name := node.Name + negate := false + if IsNegation(name) { + name = name[1:] + negate = true + } + tagValue, tagNameFound := tags[name] + absVal, absValFound := node.AbsoluteValues[tagValue] + if tagNameFound && absValFound { + if data != nil && *data != nil { + *data = append(*data, absVal.Data...) + } + if len(absVal.Data) > 0 { + // matched data. + if isMatchAny { + return true, nil + } + matched = true + } + + matchedAbsSubtree, err = match(absVal.Tree, tags, data, isMatchAny) + if err != nil { + return false, err + } + + if matchedAbsSubtree && isMatchAny { + return true, nil + } + + matched = matched || matchedAbsSubtree + } + if tagNameFound != negate { + for _, v := range node.PatternValues { + valMatched, err := v.PatternTrie.Match(tagValue, nil) + if err != nil { + return false, err + } + if valMatched { + if data != nil && *data != nil { + *data = append(*data, v.Data...) + } + if len(v.Data) > 0 { + // matched data. + if isMatchAny { + return true, nil + } + + matched = true + } + matchedPatternSubtree, err = match(v.Tree, tags, data, isMatchAny) + if err != nil { + return false, err + } + + if matchedPatternSubtree && isMatchAny { + return true, nil + } + + matched = matched || matchedPatternSubtree + } + } + } + } + + return matched, nil +} + +// TagsFromTagFilter creates tags from a tag filter. +// The tag values can be of the format: +// "foo" OR "{foo,bar,baz}" OR "{{Variable}}" +// There cannot be a mix of value formats like "simpleValue, {foo,bar}" etc. +func TagsFromTagFilter(tf string) ([]Tag, error) { + tags := make([]Tag, 0) + tfSanitized := strings.TrimSpace(tf) + tagValuePairs := strings.Split(tfSanitized, " ") + for _, tagValuePair := range tagValuePairs { + tagAndValue := strings.Split(tagValuePair, ":") + if len(tagAndValue) != 2 { + return nil, fmt.Errorf("invalid tag filter: %s", tf) + } + tag := tagAndValue[0] + val := tagAndValue[1] + if len(tag) == 0 || len(val) == 0 { + return nil, fmt.Errorf("invalid tag filter: %s", tf) + } + + varName := "" + if IsVarTagValue(val) { + varName = val + val = _matchall + } + if val == _matchNone { + tag = "!" + tag + val = _matchall + } + + tags = append(tags, Tag{ + Name: tag, + Val: val, + Var: varName, + }) + } + + return tags, nil +} + +// IsVarTagValue returns true if the value is a variable tag value. +func IsVarTagValue(value string) bool { + if len(value) < 4 { + return false + } + + for i := 0; i < len(value); i++ { + if value[i] == '{' { + if i+1 < len(value) && value[i+1] == '{' { + if strings.Contains(value[i+1:], "}}") { + return true + } + } + } + } + + return false +} + +// IsNegation returns true if the tag is a match none tag. +func IsNegation(tagName string) bool { + if len(tagName) == 0 { + return false + } + return tagName[0] == '!' +} + +// IsAbsoluteValue returns true if the value is an absolute value. +func IsAbsoluteValue(val string) bool { + return !strings.ContainsAny(val, "{!*[") +} diff --git a/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go b/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go new file mode 100644 index 0000000000..06a60a8761 --- /dev/null +++ b/src/metrics/tagfiltertree/tag_filter_tree_benchmark_test.go @@ -0,0 +1,110 @@ +package tagfiltertree + +import ( + "fmt" + "math/rand" + "strings" + "testing" + "time" + + "github.com/influxdata/influxdb/pkg/testing/assert" +) + +const ( + _numFilters = 100 +) + +// Predefined tag names and values for benchmarks +var tags = []string{"tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7", "tag8", "tag9", "tag10"} +var values = []string{"value1", "value2", "value3", "value4", "value5"} + +// Filter Types +const ( + ABSOLUTE = 0 + WILDCARD = 1 + NEGATION = 2 + COMPOSITE = 3 +) + +/* +* Benchmark results: +* BenchmarkTagFilterTreeMatch/MatchAll-10 673851 1561 ns/op 190 B/op 0 allocs/op +* BenchmarkTagFilterTreeMatch/MatchAny-10 4203621 256.0 ns/op 0 B/op 0 allocs/op + */ +func BenchmarkTagFilterTreeMatch(b *testing.B) { + // Generate a benchmark set of tag filters with varying complexity + filters := make([]string, 0) + for i := 0; i < _numFilters; i++ { + filters = append(filters, generateTagFilter(1+rnd.Intn(len(tags)-1))) // 10 filters in the string + } + input := map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + } + tree := New[*Rule]() + data := &Rule{} + + for _, f := range filters { + err := tree.AddTagFilter(f, data) + assert.NoError(b, err) + } + + b.ResetTimer() + b.Run("MatchAll", func(b *testing.B) { + resultData := make([]*Rule, 0, 10) + for i := 0; i < b.N; i++ { + _, _ = tree.Match(input, &resultData) + } + }) + + b.ResetTimer() + b.Run("MatchAny", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = tree.Match(input, nil) + } + }) +} + +// Seeded random number generator for deterministic results +var rnd = rand.New(rand.NewSource(time.Now().UnixNano())) + +// Generate a single random tag filter based on type +func generateTagValuePair(idx int, tagType int) string { + tag := tags[idx] + switch tagType { + case ABSOLUTE: + value := values[rnd.Intn(len(values))] + return fmt.Sprintf("%s:%s", tag, value) + case WILDCARD: + return fmt.Sprintf("%s:*", tag) + case NEGATION: + if rnd.Intn(2) == 0 { + return fmt.Sprintf("%s:!*", tag) // tag should not exist + } + + value := values[rnd.Intn(len(values))] + return fmt.Sprintf("%s:!%s", tag, value) // tag should not have this value + case COMPOSITE: + value1 := values[rnd.Intn(len(values))] + value2 := values[rnd.Intn(len(values))] + return fmt.Sprintf("%s:{%s,%s}", tag, value1, value2) // matches either value1 or value2 + default: + return "" + } +} + +// Generate a tag filter with the specified number of tags. +func generateTagFilter(numTags int) string { + var filter []string + + idxs := rnd.Perm(len(tags)) + for i := 0; i < numTags; i++ { + tagType := rnd.Intn(4) // Randomly choose from ABSOLUTE, WILDCARD, NEGATION, COMPOSITE + pair := generateTagValuePair(idxs[i], tagType) + filter = append(filter, pair) + } + return strings.Join(filter, " ") +} diff --git a/src/metrics/tagfiltertree/tag_filter_tree_test.go b/src/metrics/tagfiltertree/tag_filter_tree_test.go new file mode 100644 index 0000000000..b6b1c33a1b --- /dev/null +++ b/src/metrics/tagfiltertree/tag_filter_tree_test.go @@ -0,0 +1,556 @@ +package tagfiltertree + +import ( + "errors" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/require" +) + +type Rule struct { + TagFilters []string + Namespace string +} + +func (r *Rule) Resolve(inputTags map[string]string) (string, error) { + if !IsVarTagValue(r.Namespace) { + return r.Namespace, nil + } + + varMap := make(map[string]string) + for _, tagFilter := range r.TagFilters { + tags, err := TagsFromTagFilter(tagFilter) + if err != nil { + return "", err + } + for _, tag := range tags { + if tag.Var != "" { + // is var tag + inputTagValue, ok := inputTags[tag.Name] + if !ok { + return "", errors.New("tag not found") + } + varMap[tag.Var] = inputTagValue + } + } + } + + ns := r.Namespace + for varName, varValue := range varMap { + ns = strings.ReplaceAll(ns, varName, varValue) + } + return ns, nil +} + +func TestTreeGetData(t *testing.T) { + tests := []struct { + name string + inputTags map[string]string + rules []Rule + expected []string + }{ + { + name: "multiple input tags, common prefix tag, absolute and wildcard, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "apple", + "tag4": "banana", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:apple", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:val* tag2:value2 tag4:banana", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace1", "namespace2"}, + }, + { + name: "multiple input tags, multiple filters, multiple rules, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag4:value4 tag5:value5", + }, + Namespace: "namespace2", + }, + { + TagFilters: []string{ + "tag5:*", + }, + Namespace: "namespace3", + }, + { + TagFilters: []string{ + "tag8:value8 tag9:value9", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace4", + }, + }, + expected: []string{"namespace1", "namespace2", "namespace3"}, + }, + { + name: "multiple input tags, common prefix tags, match one", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "apple", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:apple", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:banana", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple input tags, common prefix tags, existing value, match one", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "apple", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:apple tag4:banana", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:apple", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace2"}, + }, + { + name: "multiple input tags, common prefix tags, existing value with wildcard, match one", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "apple", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:* tag4:banana", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:*", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace2"}, + }, + { + name: "multiple input tags, multiple filters, multiple rules, multiple values, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:val1 tag4:value4 tag5:value5", + }, + Namespace: "namespaceA", + }, + { + TagFilters: []string{ + "tag1:val* tag4:value4 tag5:value5", + }, + Namespace: "namespace2", + }, + { + TagFilters: []string{ + "tag5:*", + }, + Namespace: "namespace3", + }, + { + TagFilters: []string{ + "tag8:value8 tag9:value9", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace4", + }, + }, + expected: []string{"namespace1", "namespace2", "namespace3"}, + }, + { + name: "single rule, all negation, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:!*", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "single rule, all negation, no match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag2:!*", + }, + Namespace: "namespace1", + }, + }, + expected: []string{}, + }, + { + name: "single rule, negation, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:!{value1,value2}", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple rules, negation, single match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + "tag4": "value4", + "tag5": "value5", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:!* tag2:value2", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag1:value1 tag6:!*", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace2"}, + }, + { + name: "single input tag, single filter, match", + inputTags: map[string]string{ + "tag1": "value1", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple input tags, single filter, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2 tag3:value3", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "single input tag, multiple filters, match", + inputTags: map[string]string{ + "tag1": "value1", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1", + "tag2:value2", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "single input tag, multiple filters, no match", + inputTags: map[string]string{ + "tag1": "value1", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:value1", + "tag2:value2", + }, + Namespace: "namespace1", + }, + }, + expected: nil, + }, + { + name: "multiple input tags, multiple filters, match", + inputTags: map[string]string{ + "tag1": "value1", + "tag2": "value2", + "tag3": "value3", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2", + "tag4:value2 tag5:value2", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple input tags, multiple filters, no match", + inputTags: map[string]string{ + "tag3": "value3", + "tag4": "value4", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag1:value1 tag2:value2", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag7:*", + }, + Namespace: "namespace2", + }, + }, + expected: nil, + }, + { + name: "multiple input tags, composite tag value, match", + inputTags: map[string]string{ + "tag3": "apple", + "tag4": "banana", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:{value3,apple} tag4:{value4,banana}", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace1", + }, + }, + expected: []string{"namespace1"}, + }, + { + name: "multiple input tags, var tag value, single rule match", + inputTags: map[string]string{ + "tag3": "apple", + "tag4": "banana", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:{{VAR1}} tag4:{{VAR2}}", + "tag5:value5 tag6:value6", + }, + Namespace: "{{VAR1}}_{{VAR2}}", + }, + }, + expected: []string{"apple_banana"}, + }, + { + name: "multiple input tags, var tag value, multi rule match", + inputTags: map[string]string{ + "tag3": "apple", + "tag4": "banana", + "tag5": "train", + "tag6": "car", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:{{VAR1}} tag4:{{VAR2}}", + "tag5:value5 tag6:value6", + }, + Namespace: "namespace1_{{VAR1}}_{{VAR2}}", + }, + { + TagFilters: []string{ + "tag4:{{VAR5}} tag5:{{VAR6}} tag6:{{VAR2}}", + "tag5:value5 tag7:value7", + }, + Namespace: "{{VAR6}}_{{VAR6}}_{{VAR2}}_{{VAR5}}", + }, + }, + expected: []string{ + "namespace1_apple_banana", + "train_train_car_banana", + }, + }, + { + name: "multiple input tags, wildcard tag value, multi rule match", + inputTags: map[string]string{ + "tag3": "apple", + "tag4": "banana", + "tag5": "train", + "tag6": "car", + }, + rules: []Rule{ + { + TagFilters: []string{ + "tag3:app* tag4:ban*", + "tag5:train tag6:value6", + }, + Namespace: "namespace1", + }, + { + TagFilters: []string{ + "tag4:bag* tag5:* tag6:c*", + "tag5:value5 tag7:value7", + }, + Namespace: "namespace2", + }, + }, + expected: []string{"namespace1"}, + }, + } + + less := func(a, b string) bool { return a < b } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tree := New[*Rule]() + for _, rule := range tt.rules { + for _, tagFilter := range rule.TagFilters { + err := tree.AddTagFilter(tagFilter, &rule) + require.NoError(t, err) + } + } + + // check all match + actual := make([]*Rule, 0, len(tt.expected)) + matched, err := tree.Match(tt.inputTags, &actual) + require.Equal(t, len(tt.expected) > 0, matched) + resolved := make([]string, 0, len(actual)) + for _, r := range actual { + ns, err := r.Resolve(tt.inputTags) + require.NoError(t, err) + resolved = append(resolved, ns) + } + + require.NoError(t, err) + if len(tt.expected) == 0 { + require.Empty(t, actual) + return + } + + actualNamespaces := uniqueNamespaces(resolved) + require.Equal(t, "", cmp.Diff(tt.expected, actualNamespaces, cmpopts.SortSlices(less))) + + // check any match + anyMatched, err := tree.Match(tt.inputTags, nil) + require.NoError(t, err) + require.Equal(t, len(tt.expected) > 0, anyMatched) + }) + } +} + +func uniqueNamespaces(input []string) []string { + unique := make(map[string]struct{}) + for _, r := range input { + unique[r] = struct{}{} + } + + output := make([]string, 0, len(unique)) + for ns := range unique { + output = append(output, ns) + } + return output +} diff --git a/src/metrics/tagfiltertree/trie.go b/src/metrics/tagfiltertree/trie.go new file mode 100644 index 0000000000..a1b1cf5de0 --- /dev/null +++ b/src/metrics/tagfiltertree/trie.go @@ -0,0 +1,356 @@ +package tagfiltertree + +import ( + "fmt" + "strings" +) + +// trieNode represents a node in the trie. +type trieNode[T any] struct { + Ch trieChar + Children trieChildren[trieNode[T]] + Data []T +} + +// newEmptyNode creates a new empty node with the given character. +func newEmptyNode[T any](ch byte) *trieNode[T] { + return &trieNode[T]{ + Ch: newTrieChar(ch), + Children: newTrieChildren[trieNode[T]](), + Data: nil, + } +} + +// Trie represents a trie data structure. +type Trie[T any] struct { + Root *trieNode[T] +} + +// NewTrie creates a new trie. +func NewTrie[T any]() *Trie[T] { + return &Trie[T]{ + Root: nil, + } +} + +// Insert inserts a new pattern and data into the trie. +func (tr *Trie[T]) Insert(pattern string, data *T) error { + if err := ValidatePattern(pattern); err != nil { + return err + } + + if tr.Root == nil { + tr.Root = newEmptyNode[T]('^') + } + + return insertHelper(tr.Root, pattern, 0, len(pattern), data) +} + +// ValidatePattern validates the given pattern. +func ValidatePattern(pattern string) error { + if len(pattern) == 0 { + return fmt.Errorf("empty pattern") + } + + if strings.Contains(pattern, "**") { + return fmt.Errorf("invalid pattern") + } + + if strings.Contains(pattern, "{") != strings.Contains(pattern, "}") { + return fmt.Errorf("invalid pattern") + } + + // pattern cannot have more than one pair of '{' and '}'. + if strings.Count(pattern, "{") > 1 || strings.Count(pattern, "}") > 1 { + return fmt.Errorf("multiple composite patterns not supported") + } + + openIdx := strings.Index(pattern, "{") + if openIdx != -1 { + closeIdx := strings.Index(pattern, "}") + if closeIdx != -1 { + // we do not support negation within composite patterns. + if strings.Contains(pattern[openIdx:closeIdx], "!") { + return fmt.Errorf("negation not supported in composite patterns") + } + } + } + + if strings.Contains(pattern, "[") != strings.Contains(pattern, "]") { + return fmt.Errorf("invalid pattern") + } + + // at the moment we only support single char ranges. + if strings.Count(pattern, "[") > 1 || strings.Count(pattern, "]") > 1 { + return fmt.Errorf("multiple char range patterns not supported") + } + + // we do not support certain special chars in the pattern. + if strings.ContainsAny(pattern, "?") { + return fmt.Errorf("invalid special chars in pattern") + } + + for _, ch := range pattern { + if ch > 127 { + return fmt.Errorf("invalid character in pattern %c", ch) + } + } + + return nil +} + +func insertHelper[T any]( + root *trieNode[T], + pattern string, + startIdx int, + endIdx int, + data *T, +) error { + if root == nil { + return nil + } + + if startIdx == endIdx { + return nil + } + + var ( + err error + options []string + ) + + if pattern[startIdx] == '{' { + options, err = ParseCompositePattern(pattern[startIdx:]) + if err != nil { + return err + } + } + + if pattern[startIdx] == '[' { + options, err = ParseCharRange(pattern[startIdx:]) + if err != nil { + return err + } + } + + for _, option := range options { + if err := insertHelper( + root, + option, + 0, + len(option), + data, + ); err != nil { + return err + } + } + + if len(options) > 0 { + // matched composite or char range pattern. + return nil + } + + var node *trieNode[T] + if root.Children.Exists(pattern[startIdx]) { + node = root.Children.Get(pattern[startIdx]) + } + if node == nil { + node = newEmptyNode[T](pattern[startIdx]) + } + + if node.Ch.Char() == '!' { + if data != nil { + node.Data = append(node.Data, *data) + } + } + + if err := root.Children.Insert(pattern[startIdx], node); err != nil { + return err + } + + if startIdx == len(pattern)-1 { + // found the leaf node. + node.Ch.SetEnd() + if data != nil { + node.Data = append(node.Data, *data) + } + } + + return insertHelper(node, pattern, startIdx+1, endIdx, data) +} + +// Match matches the given input against the trie. +func (tr *Trie[T]) Match(input string, data *[]T) (bool, error) { + return matchHelper(tr.Root, input, 0, data) +} + +func matchHelper[T any]( + root *trieNode[T], + input string, + startIdx int, + data *[]T, +) (bool, error) { + if root == nil { + return false, nil + } + + if startIdx == len(input) { + // looks for patterns that end with a '*'. + var child *trieNode[T] + if root.Children.Exists('*') { + child = root.Children.Get('*') + } + if child != nil && child.Ch.IsEnd() { + if data != nil { + *data = append(*data, child.Data...) + } + return true, nil + } + + return false, nil + } + + matched := false + // check matchAll case. + var child *trieNode[T] + if root.Children.Exists('*') { + child = root.Children.Get('*') + } + if child != nil { + var err error + + // move forward in the pattern and input. + if startIdx < len(input) { + var subChild *trieNode[T] + if child.Children.Exists(input[startIdx]) { + subChild = child.Children.Get(input[startIdx]) + } + if subChild != nil { + matchedOne, err := matchHelper(subChild, input, startIdx+1, data) + if err != nil { + return false, err + } + matched = matched || matchedOne + } + } + + // stay on the '*'. + matchedAll, err := matchHelper(root, input, startIdx+1, data) + if err != nil { + return false, err + } + + matched = matched || matchedAll + } + + child = nil + if root.Children.Exists('!') { + child = root.Children.Get('!') + } + if child != nil { + matchedNegate, err := matchHelper(child, input, startIdx, nil) + if err != nil { + return false, err + } + + matched = matched || !matchedNegate + if !matchedNegate { + if data != nil { + *data = append(*data, child.Data...) + } + } + } + + var subChild *trieNode[T] + if root.Children.Exists(input[startIdx]) { + subChild = root.Children.Get(input[startIdx]) + } + if subChild != nil { + matchedOne, err := matchHelper(subChild, input, startIdx+1, data) + if err != nil { + return false, err + } + matched = matched || matchedOne + + if startIdx == len(input)-1 { + if subChild.Ch.IsEnd() { + if data != nil { + *data = append(*data, subChild.Data...) + } + matched = true + } + } + } + + return matched, nil +} + +// ParseCompositePattern extracts the options from a composite pattern. +// It appends the suffix (the segment after the '}' character) to each option. +// It does not check for other invalid chars in the pattern. Those checks should +// be done before calling this function. +func ParseCompositePattern(pattern string) ([]string, error) { + openIdx := strings.Index(pattern, "{") + closeIdx := strings.Index(pattern, "}") + + if openIdx == -1 || closeIdx == -1 { + return nil, fmt.Errorf("invalid pattern") + } + + if closeIdx != strings.LastIndex(pattern, "}") { + return nil, fmt.Errorf("invalid pattern") + } + + optionStr := pattern[openIdx+1 : closeIdx] + optionSuffix := "" + if closeIdx < len(pattern)-1 { + optionSuffix = pattern[closeIdx+1:] + } + optionPrefix := pattern[:openIdx] + + options := strings.Split(optionStr, ",") + for i := range options { + option := strings.TrimSpace(options[i]) + options[i] = optionPrefix + option + optionSuffix + } + + return options, nil +} + +// ParseCharRange extracts the options from a char range pattern. +// It does not check for other invalid chars in the pattern. Those checks should +// be done before calling this function. +func ParseCharRange(pattern string) ([]string, error) { + openIdx := strings.Index(pattern, "[") + closeIdx := strings.Index(pattern, "]") + + if openIdx == -1 || closeIdx == -1 { + return nil, fmt.Errorf("invalid pattern") + } + + if closeIdx != strings.LastIndex(pattern, "]") { + return nil, fmt.Errorf("invalid pattern") + } + + optionStr := pattern[openIdx+1 : closeIdx] + optionSuffix := "" + if closeIdx < len(pattern)-1 { + optionSuffix = pattern[closeIdx+1:] + } + optionPrefix := pattern[:openIdx] + + options := make([]string, 0) + for i := 0; i < len(optionStr); i++ { + if i+2 < len(optionStr) && optionStr[i+1] == '-' { + for ch := optionStr[i]; ch <= optionStr[i+2]; ch++ { + options = append(options, optionPrefix+string(ch)+optionSuffix) + } + i += 2 + continue + } + + options = append(options, optionPrefix+string(optionStr[i])+optionSuffix) + } + + return options, nil +} diff --git a/src/metrics/tagfiltertree/trie_char.go b/src/metrics/tagfiltertree/trie_char.go new file mode 100644 index 0000000000..dfbcf7c747 --- /dev/null +++ b/src/metrics/tagfiltertree/trie_char.go @@ -0,0 +1,27 @@ +package tagfiltertree + +type trieChar struct { + ch byte +} + +// SetEnd sets the end flag of the character. +func (c *trieChar) SetEnd() { + c.ch |= 0x01 +} + +// IsEnd returns true if the character is the end of a pattern. +func (c *trieChar) IsEnd() bool { + return c.ch&0x01 == 0x01 +} + +// newTrieChar creates a new trie character. +func newTrieChar(ch byte) trieChar { + return trieChar{ + ch: ch << 1, + } +} + +// Char returns the character. +func (c *trieChar) Char() byte { + return c.ch >> 1 +} diff --git a/src/metrics/tagfiltertree/trie_children.go b/src/metrics/tagfiltertree/trie_children.go new file mode 100644 index 0000000000..1f8fd9cd57 --- /dev/null +++ b/src/metrics/tagfiltertree/trie_children.go @@ -0,0 +1,51 @@ +package tagfiltertree + +type trieChildren[T any] struct { + Children []*T + PointerSet PointerSet +} + +func newTrieChildren[T any]() trieChildren[T] { + return trieChildren[T]{ + Children: nil, + } +} + +// Insert inserts a new child with the given byte and data. +func (tc *trieChildren[T]) Insert(ch byte, data *T) error { + if tc.PointerSet.IsSet(ch) { + // already exists. + return nil + } + + newIdx := tc.PointerSet.CountSetBitsUntil(ch) + + // make room for the new child. + tc.Children = append(tc.Children, nil) + + for i := len(tc.Children) - 1; i > newIdx; i-- { + tc.Children[i] = tc.Children[i-1] + } + + // set the new data in the correct idx. + tc.Children[newIdx] = data + + // set the idx of the new child. + tc.PointerSet.Set(ch) + + return nil +} + +func (tc *trieChildren[T]) Get(ch byte) *T { + if !tc.PointerSet.IsSet(ch) { + return nil + } + + childIdx := tc.PointerSet.CountSetBitsUntil(ch) - 1 + return tc.Children[childIdx] +} + +// Exists returns true if the child with the given byte exists. +func (tc *trieChildren[T]) Exists(ch byte) bool { + return tc.PointerSet.IsSet(ch) +} diff --git a/src/metrics/tagfiltertree/trie_children_test.go b/src/metrics/tagfiltertree/trie_children_test.go new file mode 100644 index 0000000000..ec9823fb86 --- /dev/null +++ b/src/metrics/tagfiltertree/trie_children_test.go @@ -0,0 +1,31 @@ +package tagfiltertree + +import ( + "testing" +) + +func TestTrieChildrenGet(t *testing.T) { + tc := newTrieChildren[int]() + + // Insert a child. + data := 42 + err := tc.Insert('a', &data) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Get the child. + got := tc.Get('a') + if got == nil { + t.Fatalf("expected child to exist") + } + if *got != data { + t.Fatalf("unexpected data: got %v, want %v", *got, data) + } + + // Get a non-existing child. + got = tc.Get('b') + if got != nil { + t.Fatalf("unexpected child: got %v, want nil", *got) + } +} diff --git a/src/metrics/tagfiltertree/trie_test.go b/src/metrics/tagfiltertree/trie_test.go new file mode 100644 index 0000000000..763e09e710 --- /dev/null +++ b/src/metrics/tagfiltertree/trie_test.go @@ -0,0 +1,418 @@ +package tagfiltertree + +import ( + "reflect" + "sort" + "testing" + + "github.com/stretchr/testify/require" +) + +type Pattern struct { + pattern string + data string +} + +func TestTrieMatch(t *testing.T) { + tests := []struct { + patterns []Pattern + expectedData []string + name string + input string + expectedMatch bool + isDisabled bool + }{ + { + name: "suffix wildcard minimum match", + patterns: []Pattern{ + { + pattern: "foo*", + data: "data1", + }, + }, + input: "foo", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "suffix wildcard match", + patterns: []Pattern{ + { + pattern: "foo*", + data: "data1", + }, + }, + input: "foo123", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "suffix wildcard suffix match", + patterns: []Pattern{ + { + pattern: "foo*", + data: "data1", + }, + }, + input: "foofoofoofoo", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "suffix wildcard no match", + patterns: []Pattern{ + { + pattern: "foo*", + data: "data1", + }, + }, + input: "fobar", + expectedMatch: false, + expectedData: []string{}, + }, + { + name: "prefix wildcard minimum match", + patterns: []Pattern{ + { + pattern: "*foo", + data: "data1", + }, + }, + input: "foo", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "prefix wildcard prefix match", + patterns: []Pattern{ + { + pattern: "*foo", + data: "data1", + }, + }, + input: "fofofofoo", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "prefix wildcard no match", + patterns: []Pattern{ + { + pattern: "*foo", + data: "data1", + }, + }, + input: "barfobo", + expectedMatch: false, + expectedData: []string{}, + }, + { + name: "single exact match", + patterns: []Pattern{ + { + pattern: "foobar", + data: "data1", + }, + }, + input: "foobar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "single suffix and prefix wildcard match", + patterns: []Pattern{ + { + pattern: "*foo*", + data: "data1", + }, + }, + input: "barfoobar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "single negation match", + patterns: []Pattern{ + { + pattern: "!foo", + data: "data1", + }, + }, + input: "bar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "single negation with wildcard match", + patterns: []Pattern{ + { + pattern: "!*foo*", + data: "data1", + }, + }, + input: "fofobar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "composite, absolute match, single match", + patterns: []Pattern{ + { + pattern: "{foo,bar}", + data: "data1", + }, + }, + input: "bar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "composite starting middle, wildcards, single match", + patterns: []Pattern{ + { + pattern: "*choco{*foo*,*bar*}late*", + data: "data1", + }, + }, + input: "123chocobarlate456", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "char range, wildcards, single match", + patterns: []Pattern{ + { + pattern: "[a-d]*", + data: "data1", + }, + }, + input: "chocolate", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "composite, wildcard match, single match", + patterns: []Pattern{ + { + pattern: "{*foo*,*bar*}", + data: "data1", + }, + }, + input: "bar", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "single negative composite, wildcards, no match", + patterns: []Pattern{ + { + pattern: "!{*foo*,*bar*}", + data: "data1", + }, + }, + input: "bar", + expectedMatch: false, + expectedData: []string{}, + }, + { + name: "composite, char range, wildcards, single match", + patterns: []Pattern{ + { + pattern: "{abc, [1-4]*, *foo*}", + data: "data1", + }, + }, + input: "30000000", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "char range without dash, wildcards, no match", + patterns: []Pattern{ + { + pattern: "[14]*", + data: "data1", + }, + }, + input: "30000000", + expectedMatch: false, + expectedData: []string{}, + }, + { + name: "char range without dash, wildcards, match", + patterns: []Pattern{ + { + pattern: "[14]*", + data: "data1", + }, + }, + input: "40000000", + expectedMatch: true, + expectedData: []string{"data1"}, + }, + { + name: "multiple composite, wildcard and absolute match, single match", + patterns: []Pattern{ + { + pattern: "{*foo*,*bal*,batbaz}", + data: "data1", + }, + { + pattern: "*batbaz", + data: "data2", + }, + }, + input: "batbaz", + expectedMatch: true, + expectedData: []string{"data1", "data2"}, + }, + { + name: "multiple intersecting composite, wildcard and absolute match, all match", + patterns: []Pattern{ + { + pattern: "!{*foo*,*bal*}", + data: "data1", + }, + { + pattern: "!{*bal*,gaz*}", + data: "data2", + }, + }, + input: "batbaz", + expectedMatch: true, + expectedData: []string{"data1", "data2"}, + // disable test since we don't support multiple negative composite filters at the moment. + isDisabled: true, + }, + { + name: "multiple intersecting composite, wildcard and absolute match, single match", + patterns: []Pattern{ + { + pattern: "!{*foo*,*bal*}", + data: "data1", + }, + { + pattern: "!{*bal*,gaz*}", + data: "data2", + }, + }, + input: "foo", + expectedMatch: true, + expectedData: []string{"data2"}, + // disable test since we don't support multiple negative composite filters at the moment. + isDisabled: true, + }, + { + name: "multiple intersecting composite, wildcard and absolute match, no match", + patterns: []Pattern{ + { + pattern: "!{*foo*,*bal*}", + data: "data1", + }, + { + pattern: "!{*bal*,gaz*}", + data: "data2", + }, + }, + input: "banbalabal", + expectedMatch: false, + expectedData: []string{}, + // disable test since we don't support multiple negative composite filters at the moment. + isDisabled: true, + }, + { + name: "multiple negations with wildcard, single match", + patterns: []Pattern{ + { + pattern: "!*foo*", + data: "data1", + }, + { + pattern: "!*bar*", + data: "data2", + }, + { + pattern: "!*baz*", + data: "data3", + }, + }, + input: "fofoobar", + expectedMatch: true, + expectedData: []string{"data3"}, + // disable test since we don't support multiple negative composite filters at the moment. + isDisabled: true, + }, + } + + for _, tt := range tests { + runTest := func(withData bool) { + if tt.isDisabled { + return + } + trie := NewTrie[string]() + for _, rule := range tt.patterns { + var data *string + if withData { + data = &rule.data + } + + err := trie.Insert(rule.pattern, data) + require.NoError(t, err) + } + + var data []string + if withData { + data = make([]string, 0) + } + matched, err := trie.Match(tt.input, &data) + require.NoError(t, err) + + // check match. + require.Equal(t, tt.expectedMatch, matched) + + if !withData { + for i := range data { + require.Empty(t, data[i]) + } + return + } + + if len(tt.expectedData) == 0 { + require.Empty(t, data) + return + } + + // dedup data. + data = deduplicateData(data) + + // check data. + sort.Slice(data, func(i, j int) bool { + return data[i] < data[j] + }) + sort.Slice(tt.expectedData, func(i, j int) bool { + return tt.expectedData[i] < tt.expectedData[j] + }) + + require.Equal(t, true, reflect.DeepEqual(data, tt.expectedData)) + } + + t.Run(tt.name+"_with_data", func(t *testing.T) { + runTest(true) + }) + t.Run(tt.name+"_without_data", func(t *testing.T) { + runTest(false) + }) + } +} + +func deduplicateData(data []string) []string { + dataMap := make(map[string]struct{}) + for i := range data { + dataMap[data[i]] = struct{}{} + } + data = make([]string, 0, len(dataMap)) + for k := range dataMap { + data = append(data, k) + } + return data +}