Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize log Masker #15

Merged
merged 1 commit into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
code.cloudfoundry.org/bytefmt v0.0.0-20240808182453-a379845013d9
github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b
github.com/cenkalti/backoff/v4 v4.3.0
github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396
github.com/go-chi/chi/v5 v5.1.0
github.com/mitchellh/mapstructure v1.5.0
github.com/prometheus/client_golang v1.19.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396 h1:W2HK1IdCnCGuLUeyizSCkwvBjdj0ZL7mxnJYQ3poyzI=
github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396/go.mod h1:tGWUZLZp9ajsxUOnHmFFLnqnlKXsCn6GReG4jAD59H0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
Expand Down
2 changes: 1 addition & 1 deletion log/logger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,5 +258,5 @@ func TestLoggerWithMasking(t *testing.T) {
},
},
}
require.Equal(t, expectedMasks, masker.FieldMasks)
require.Equal(t, expectedMasks, masker.fieldMasks)
}
116 changes: 107 additions & 9 deletions log/masker.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package log
import (
"regexp"
"strings"
"unsafe"

"github.com/cloudflare/ahocorasick"
)

// Mask is used to mask a secret in strings.
Expand All @@ -24,7 +27,9 @@ type FieldMasker struct {
const ignoreCase = `(?i)`

func NewFieldMasker(cfg MaskingRuleConfig) FieldMasker {
fMask := FieldMasker{Field: strings.ToLower(cfg.Field), Masks: make([]Mask, 0, len(cfg.Masks))}
fMask := FieldMasker{Field: strings.ToLower(cfg.Field),
Masks: make([]Mask, 0, len(cfg.Masks)),
}

for _, repCfg := range cfg.Masks {
fMask.Masks = append(fMask.Masks, NewMask(repCfg))
Expand All @@ -42,23 +47,116 @@ func NewFieldMasker(cfg MaskingRuleConfig) FieldMasker {
return fMask
}

// Masker is used to mask various secrets in strings.
// Masker uses the Aho-Corasick algorithm to simultaneously search for all patterns in a string.
// The order of applying masking rules is guaranteed.
type Masker struct {
FieldMasks []FieldMasker
fieldMasks []FieldMasker
aho *ahocorasick.Matcher
maskFunc func(s string) string
matchToRuleIndex []int // maps Aho-Corasick match indices to rule indices
ruleToMatchIdx []int // maps field mask indices to Aho-Corasick match indices
}

// limitation for Aho-Corasick algorithm version of Mask function
const maxRulesForOpt = 64

// NewMasker creates a new Masker instance.
// This function initializes two mappings without memory barriers because we are confident
// that the function has "happens-before" guarantee.
// Otherwise, we would need to use redundant mutex operations in Mask().
func NewMasker(rules []MaskingRuleConfig) *Masker {
r := &Masker{FieldMasks: make([]FieldMasker, 0, len(rules))}
for _, rule := range rules {
r.FieldMasks = append(r.FieldMasks, NewFieldMasker(rule))
r := &Masker{
fieldMasks: make([]FieldMasker, 0, len(rules)),
matchToRuleIndex: make([]int, 0),
ruleToMatchIdx: make([]int, len(rules)),
}

patterns := make([]string, 0, len(rules))
for idx, rule := range rules {
masker := NewFieldMasker(rule)
r.fieldMasks = append(r.fieldMasks, masker)
if masker.Field == "" {
// assign -1 to indicate that no specific match is needed (empty fields masks apply universally)
r.ruleToMatchIdx[idx] = -1
} else {
patterns = append(patterns, strings.ToLower(rule.Field))
// map the current rule index to the next available match index
r.matchToRuleIndex = append(r.matchToRuleIndex, idx)
// store the mapping from the rule index to the corresponding match index
r.ruleToMatchIdx[idx] = len(r.matchToRuleIndex) - 1
}
}
r.aho = ahocorasick.NewStringMatcher(patterns)

if len(patterns) <= maxRulesForOpt {
r.maskFunc = r.maskFor64Fields
} else {
r.maskFunc = r.mask
}

return r
}

func stringToBytes(s string) []byte {
return *(*[]byte)(unsafe.Pointer(&s)) // nolint: gosec // reduce number of allocations
}

// Mask applies the appropriate masking function to the input string.
func (r *Masker) Mask(s string) string {
lower := strings.ToLower(s)
for _, fieldMask := range r.FieldMasks {
if strings.Contains(lower, fieldMask.Field) {
return r.maskFunc(s)
}

func (r *Masker) mask(s string) string {
lower := stringToBytes(strings.ToLower(s))
matches := r.aho.MatchThreadSafe(lower)

matched := make([]bool, len(r.matchToRuleIndex))
for _, matchIdx := range matches {
// for each match index from Aho-Corasick, find the corresponding rule index
ruleIdx := r.matchToRuleIndex[matchIdx]
matched[ruleIdx] = true
}

for i, fieldMask := range r.fieldMasks {
matchIdx := r.ruleToMatchIdx[i]
apply := false
if matchIdx == -1 {
apply = true
} else {
// get the index in matchToRuleIndex corresponding to this field
apply = matched[matchIdx]
}

if apply {
for _, rep := range fieldMask.Masks {
s = rep.RegExp.ReplaceAllString(s, rep.Mask)
}
}
}

return s
}

// maskFor64Fields is a optimized version of mask for 64 fields max.
func (r *Masker) maskFor64Fields(s string) string {
lower := stringToBytes(strings.ToLower(s))
matches := r.aho.MatchThreadSafe(lower)

var matchedMask uint64 // 64 fields max
for _, idx := range matches {
matchedMask |= 1 << idx
}

for i, fieldMask := range r.fieldMasks {
matchIdx := r.ruleToMatchIdx[i]
apply := false
if matchIdx == -1 {
apply = true
} else {
apply = (matchedMask & (1 << matchIdx)) != 0
}

if apply {
for _, rep := range fieldMask.Masks {
s = rep.RegExp.ReplaceAllString(s, rep.Mask)
}
Expand Down
Loading
Loading