Skip to content

Commit

Permalink
Merge pull request #15 from MikeYast/fast_masker
Browse files Browse the repository at this point in the history
Optimize log Masker
  • Loading branch information
MikeYast authored Oct 25, 2024
2 parents d4c7a80 + 1834ee6 commit 7bf23f5
Show file tree
Hide file tree
Showing 5 changed files with 799 additions and 24 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
code.cloudfoundry.org/bytefmt v0.0.0-20240808182453-a379845013d9
github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b
github.com/cenkalti/backoff/v4 v4.3.0
github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396
github.com/go-chi/chi/v5 v5.1.0
github.com/mitchellh/mapstructure v1.5.0
github.com/prometheus/client_golang v1.19.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396 h1:W2HK1IdCnCGuLUeyizSCkwvBjdj0ZL7mxnJYQ3poyzI=
github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396/go.mod h1:tGWUZLZp9ajsxUOnHmFFLnqnlKXsCn6GReG4jAD59H0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
Expand Down
2 changes: 1 addition & 1 deletion log/logger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,5 +258,5 @@ func TestLoggerWithMasking(t *testing.T) {
},
},
}
require.Equal(t, expectedMasks, masker.FieldMasks)
require.Equal(t, expectedMasks, masker.fieldMasks)
}
116 changes: 107 additions & 9 deletions log/masker.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package log
import (
"regexp"
"strings"
"unsafe"

"github.com/cloudflare/ahocorasick"
)

// Mask is used to mask a secret in strings.
Expand All @@ -24,7 +27,9 @@ type FieldMasker struct {
const ignoreCase = `(?i)`

func NewFieldMasker(cfg MaskingRuleConfig) FieldMasker {
fMask := FieldMasker{Field: strings.ToLower(cfg.Field), Masks: make([]Mask, 0, len(cfg.Masks))}
fMask := FieldMasker{Field: strings.ToLower(cfg.Field),
Masks: make([]Mask, 0, len(cfg.Masks)),
}

for _, repCfg := range cfg.Masks {
fMask.Masks = append(fMask.Masks, NewMask(repCfg))
Expand All @@ -42,23 +47,116 @@ func NewFieldMasker(cfg MaskingRuleConfig) FieldMasker {
return fMask
}

// Masker is used to mask various secrets in strings.
// Masker uses the Aho-Corasick algorithm to simultaneously search for all patterns in a string.
// The order of applying masking rules is guaranteed.
type Masker struct {
FieldMasks []FieldMasker
fieldMasks []FieldMasker
aho *ahocorasick.Matcher
maskFunc func(s string) string
matchToRuleIndex []int // maps Aho-Corasick match indices to rule indices
ruleToMatchIdx []int // maps field mask indices to Aho-Corasick match indices
}

// limitation for Aho-Corasick algorithm version of Mask function
const maxRulesForOpt = 64

// NewMasker creates a new Masker instance.
// This function initializes two mappings without memory barriers because we are confident
// that the function has "happens-before" guarantee.
// Otherwise, we would need to use redundant mutex operations in Mask().
func NewMasker(rules []MaskingRuleConfig) *Masker {
r := &Masker{FieldMasks: make([]FieldMasker, 0, len(rules))}
for _, rule := range rules {
r.FieldMasks = append(r.FieldMasks, NewFieldMasker(rule))
r := &Masker{
fieldMasks: make([]FieldMasker, 0, len(rules)),
matchToRuleIndex: make([]int, 0),
ruleToMatchIdx: make([]int, len(rules)),
}

patterns := make([]string, 0, len(rules))
for idx, rule := range rules {
masker := NewFieldMasker(rule)
r.fieldMasks = append(r.fieldMasks, masker)
if masker.Field == "" {
// assign -1 to indicate that no specific match is needed (empty fields masks apply universally)
r.ruleToMatchIdx[idx] = -1
} else {
patterns = append(patterns, strings.ToLower(rule.Field))
// map the current rule index to the next available match index
r.matchToRuleIndex = append(r.matchToRuleIndex, idx)
// store the mapping from the rule index to the corresponding match index
r.ruleToMatchIdx[idx] = len(r.matchToRuleIndex) - 1
}
}
r.aho = ahocorasick.NewStringMatcher(patterns)

if len(patterns) <= maxRulesForOpt {
r.maskFunc = r.maskFor64Fields
} else {
r.maskFunc = r.mask
}

return r
}

func stringToBytes(s string) []byte {
return *(*[]byte)(unsafe.Pointer(&s)) // nolint: gosec // reduce number of allocations
}

// Mask applies the appropriate masking function to the input string.
func (r *Masker) Mask(s string) string {
lower := strings.ToLower(s)
for _, fieldMask := range r.FieldMasks {
if strings.Contains(lower, fieldMask.Field) {
return r.maskFunc(s)
}

func (r *Masker) mask(s string) string {
lower := stringToBytes(strings.ToLower(s))
matches := r.aho.MatchThreadSafe(lower)

matched := make([]bool, len(r.matchToRuleIndex))
for _, matchIdx := range matches {
// for each match index from Aho-Corasick, find the corresponding rule index
ruleIdx := r.matchToRuleIndex[matchIdx]
matched[ruleIdx] = true
}

for i, fieldMask := range r.fieldMasks {
matchIdx := r.ruleToMatchIdx[i]
apply := false
if matchIdx == -1 {
apply = true
} else {
// get the index in matchToRuleIndex corresponding to this field
apply = matched[matchIdx]
}

if apply {
for _, rep := range fieldMask.Masks {
s = rep.RegExp.ReplaceAllString(s, rep.Mask)
}
}
}

return s
}

// maskFor64Fields is a optimized version of mask for 64 fields max.
func (r *Masker) maskFor64Fields(s string) string {
lower := stringToBytes(strings.ToLower(s))
matches := r.aho.MatchThreadSafe(lower)

var matchedMask uint64 // 64 fields max
for _, idx := range matches {
matchedMask |= 1 << idx
}

for i, fieldMask := range r.fieldMasks {
matchIdx := r.ruleToMatchIdx[i]
apply := false
if matchIdx == -1 {
apply = true
} else {
apply = (matchedMask & (1 << matchIdx)) != 0
}

if apply {
for _, rep := range fieldMask.Masks {
s = rep.RegExp.ReplaceAllString(s, rep.Mask)
}
Expand Down
Loading

0 comments on commit 7bf23f5

Please sign in to comment.