diff --git a/go.mod b/go.mod index c9c52ea..0b0eb90 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( code.cloudfoundry.org/bytefmt v0.0.0-20240808182453-a379845013d9 github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b github.com/cenkalti/backoff/v4 v4.3.0 + github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396 github.com/go-chi/chi/v5 v5.1.0 github.com/mitchellh/mapstructure v1.5.0 github.com/prometheus/client_golang v1.19.1 diff --git a/go.sum b/go.sum index 44d4f20..fd136dd 100644 --- a/go.sum +++ b/go.sum @@ -11,6 +11,8 @@ github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396 h1:W2HK1IdCnCGuLUeyizSCkwvBjdj0ZL7mxnJYQ3poyzI= +github.com/cloudflare/ahocorasick v0.0.0-20240916140611-054963ec9396/go.mod h1:tGWUZLZp9ajsxUOnHmFFLnqnlKXsCn6GReG4jAD59H0= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= diff --git a/log/logger_test.go b/log/logger_test.go index 6a70410..ae8876b 100644 --- a/log/logger_test.go +++ b/log/logger_test.go @@ -258,5 +258,5 @@ func TestLoggerWithMasking(t *testing.T) { }, }, } - require.Equal(t, expectedMasks, masker.FieldMasks) + require.Equal(t, expectedMasks, masker.fieldMasks) } diff --git a/log/masker.go b/log/masker.go index 0dd825a..9013118 100644 --- a/log/masker.go +++ b/log/masker.go @@ -3,6 +3,9 @@ package log import ( "regexp" "strings" + "unsafe" + + "github.com/cloudflare/ahocorasick" ) // Mask is used to mask a secret in strings. @@ -24,7 +27,9 @@ type FieldMasker struct { const ignoreCase = `(?i)` func NewFieldMasker(cfg MaskingRuleConfig) FieldMasker { - fMask := FieldMasker{Field: strings.ToLower(cfg.Field), Masks: make([]Mask, 0, len(cfg.Masks))} + fMask := FieldMasker{Field: strings.ToLower(cfg.Field), + Masks: make([]Mask, 0, len(cfg.Masks)), + } for _, repCfg := range cfg.Masks { fMask.Masks = append(fMask.Masks, NewMask(repCfg)) @@ -42,23 +47,116 @@ func NewFieldMasker(cfg MaskingRuleConfig) FieldMasker { return fMask } -// Masker is used to mask various secrets in strings. +// Masker uses the Aho-Corasick algorithm to simultaneously search for all patterns in a string. +// The order of applying masking rules is guaranteed. type Masker struct { - FieldMasks []FieldMasker + fieldMasks []FieldMasker + aho *ahocorasick.Matcher + maskFunc func(s string) string + matchToRuleIndex []int // maps Aho-Corasick match indices to rule indices + ruleToMatchIdx []int // maps field mask indices to Aho-Corasick match indices } +// limitation for Aho-Corasick algorithm version of Mask function +const maxRulesForOpt = 64 + +// NewMasker creates a new Masker instance. +// This function initializes two mappings without memory barriers because we are confident +// that the function has "happens-before" guarantee. +// Otherwise, we would need to use redundant mutex operations in Mask(). func NewMasker(rules []MaskingRuleConfig) *Masker { - r := &Masker{FieldMasks: make([]FieldMasker, 0, len(rules))} - for _, rule := range rules { - r.FieldMasks = append(r.FieldMasks, NewFieldMasker(rule)) + r := &Masker{ + fieldMasks: make([]FieldMasker, 0, len(rules)), + matchToRuleIndex: make([]int, 0), + ruleToMatchIdx: make([]int, len(rules)), + } + + patterns := make([]string, 0, len(rules)) + for idx, rule := range rules { + masker := NewFieldMasker(rule) + r.fieldMasks = append(r.fieldMasks, masker) + if masker.Field == "" { + // assign -1 to indicate that no specific match is needed (empty fields masks apply universally) + r.ruleToMatchIdx[idx] = -1 + } else { + patterns = append(patterns, strings.ToLower(rule.Field)) + // map the current rule index to the next available match index + r.matchToRuleIndex = append(r.matchToRuleIndex, idx) + // store the mapping from the rule index to the corresponding match index + r.ruleToMatchIdx[idx] = len(r.matchToRuleIndex) - 1 + } + } + r.aho = ahocorasick.NewStringMatcher(patterns) + + if len(patterns) <= maxRulesForOpt { + r.maskFunc = r.maskFor64Fields + } else { + r.maskFunc = r.mask } + return r } +func stringToBytes(s string) []byte { + return *(*[]byte)(unsafe.Pointer(&s)) // nolint: gosec // reduce number of allocations +} + +// Mask applies the appropriate masking function to the input string. func (r *Masker) Mask(s string) string { - lower := strings.ToLower(s) - for _, fieldMask := range r.FieldMasks { - if strings.Contains(lower, fieldMask.Field) { + return r.maskFunc(s) +} + +func (r *Masker) mask(s string) string { + lower := stringToBytes(strings.ToLower(s)) + matches := r.aho.MatchThreadSafe(lower) + + matched := make([]bool, len(r.matchToRuleIndex)) + for _, matchIdx := range matches { + // for each match index from Aho-Corasick, find the corresponding rule index + ruleIdx := r.matchToRuleIndex[matchIdx] + matched[ruleIdx] = true + } + + for i, fieldMask := range r.fieldMasks { + matchIdx := r.ruleToMatchIdx[i] + apply := false + if matchIdx == -1 { + apply = true + } else { + // get the index in matchToRuleIndex corresponding to this field + apply = matched[matchIdx] + } + + if apply { + for _, rep := range fieldMask.Masks { + s = rep.RegExp.ReplaceAllString(s, rep.Mask) + } + } + } + + return s +} + +// maskFor64Fields is a optimized version of mask for 64 fields max. +func (r *Masker) maskFor64Fields(s string) string { + lower := stringToBytes(strings.ToLower(s)) + matches := r.aho.MatchThreadSafe(lower) + + var matchedMask uint64 // 64 fields max + for _, idx := range matches { + matchedMask |= 1 << idx + } + + for i, fieldMask := range r.fieldMasks { + matchIdx := r.ruleToMatchIdx[i] + apply := false + if matchIdx == -1 { + apply = true + } else { + apply = (matchedMask & (1 << matchIdx)) != 0 + } + + if apply { for _, rep := range fieldMask.Masks { s = rep.RegExp.ReplaceAllString(s, rep.Mask) } diff --git a/log/masker_test.go b/log/masker_test.go index 9185782..0c5bd09 100644 --- a/log/masker_test.go +++ b/log/masker_test.go @@ -2,6 +2,8 @@ package log import ( + "math/rand" + "strings" "testing" "github.com/stretchr/testify/require" @@ -11,29 +13,32 @@ func TestMasker(t *testing.T) { replAToB := MaskingRuleConfig{Masks: []MaskConfig{{`A`, `B`}}} replBToA := MaskingRuleConfig{Masks: []MaskConfig{{`B`, `A`}}} cases := []struct { - masker *Masker - input string - expected string + ruleConfig []MaskingRuleConfig + input string + expected string }{ { - NewMasker([]MaskingRuleConfig{replAToB}), + []MaskingRuleConfig{replAToB}, "ABA", "BBB", }, { - NewMasker([]MaskingRuleConfig{replAToB, replBToA}), + []MaskingRuleConfig{replAToB, replBToA}, "ABA", "AAA", }, { - NewMasker([]MaskingRuleConfig{replBToA, replAToB}), + []MaskingRuleConfig{replBToA, replAToB}, "ABA", "BBB", }, } for _, c := range cases { - out := c.masker.Mask(c.input) - require.Equal(t, c.expected, out) + t.Run(c.input, func(t *testing.T) { + m := NewMasker(c.ruleConfig) + out := m.Mask(c.input) + require.Equal(t, c.expected, out) + }) } } @@ -172,21 +177,625 @@ func TestDefaultMasks(t *testing.T) { } } -func BenchmarkMasker(b *testing.B) { - r := NewMasker(DefaultMasks) +var testMMasks = []MaskingRuleConfig{ + { + Field: "Authorization", + Formats: []FieldMaskFormat{FieldMaskFormatHTTPHeader}, + }, + { + Field: "authorization", + Formats: []FieldMaskFormat{FieldMaskFormatHTTPHeader}, + }, + { + Field: "Password", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "password", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "ClientSecret", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "client_secret", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "AccessToken", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "access_token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "RefreshToken", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "refresh_token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "IdToken", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "id_token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "assertion", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "Pwd", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "pwd", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "Salt", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "salt", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "Tenant", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "tenant", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "Cookie", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "cookie", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "ApiKey", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "api_key", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "CreditCard", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "credit_card", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "SSN", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "ssn", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "SocialSecurityNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "social_security_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "Token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "SessionID", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "session_id", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "Secret", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "secret", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "AuthToken", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "auth_token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "AccessKey", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "access_key", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "PrivateKey", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "private_key", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "EncryptionKey", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "encryption_key", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "DatabasePassword", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "database_password", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "DbPassword", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "db_password", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "FtpPassword", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "ftp_password", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "SSHKey", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "ssh_key", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "OAuthToken", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "oauth_token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "BearerToken", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "bearer_token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "JWT", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "jwt", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "VerificationCode", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "verification_code", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "OneTimePassword", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "one_time_password", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "OTP", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "BearerToken", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "bearer_token", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "JWT", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "jwt", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "VerificationCode", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "verification_code", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "OneTimePassword", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "one_time_password", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "OTP", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "otp", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + // --- Дополнительные маски --- + { + Field: "Email", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "email", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "EmailAddress", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "email_address", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "PhoneNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "phone_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "Phone", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "phone", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "CreditCardNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "credit_card_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "CVV", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "cvv", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "CVC", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "cvc", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "ExpirationDate", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "expiration_date", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "CardNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "card_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "BankAccount", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "bank_account", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "BankAccountNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "bank_account_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "BankRoutingNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "bank_routing_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "CreditScore", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "credit_score", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "HealthInsuranceNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "health_insurance_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "DriversLicenseNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "drivers_license_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "PassportNumber", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "passport_number", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "TaxID", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "tax_id", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "PII", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "pii", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "PIIData", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "pii_data", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "FinancialInfo", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "financial_info", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "MedicalRecord", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "medical_record", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "BiometricData", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "biometric_data", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "LocationData", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "location_data", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "DeviceID", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "device_id", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "MacAddress", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "mac_address", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "Geolocation", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, + { + Field: "geolocation", + Formats: []FieldMaskFormat{FieldMaskFormatJSON, FieldMaskFormatURLEncoded}, + }, +} +func TestCustomLargeAmountOfMasks(t *testing.T) { + tests := []struct { + name, s, expected string + }{ + { + name: "simple", + s: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\nclient_secret=eyJhbGciOiJSUzI1NiIsImVhcCI6MSwiaXJpIjoiY2hlNWphMmowaW9kN3E0c21kbDAiLCJraWQiOiU1NzVkYjAifQ.eyJhdWQiOiJ1cy1jbG91ZC5hY3JvbmlzLmNvbSIs7QI0ctcs7ZN8OsCDUxhM4liWPGg&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000", + expected: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\nclient_secret=***&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000", + }, + { + name: "short", + s: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\nclient_secret=eyJhbGciOiJSUzI1NiIsImVhcCI6MSwiaXJpIjoiY2hlNWphMmowaW9kN3E0c21kbDAiLCJraWQiOiU1NzVkYjAifQ.eyJhdWQiOiJ1cy1jbG91ZC5hY3JvbmlzLmNvbSIs7QI0ctcs7ZN8OsCDUxhM4liWPGg", + expected: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\nclient_secret=***", + }, + { + name: "after", + s: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\ngrant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000&client_secret=eyJhbGciOiJSUzI1NiIsImVhcCI6MSwiaXJpIjoiY2hlNWphMmowaW9kN3E0c21kbDAiLCJraWQiOiU1NzVkYjAifQ.eyJhdWQiOiJ1cy1jbG91ZC5hY3JvbmlzLmNvbSIs7QI0ctcs7ZN8OsCDUxhM4liWPGg", + expected: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\ngrant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000&client_secret=***", + }, + { + name: "middle", + s: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\ngrant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&VerificationCode=ABCD&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000", + expected: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\ngrant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&VerificationCode=***&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000", + }, + } + + masker := NewMasker(testMMasks) + for _, test := range tests { + subtest := test + t.Run(subtest.name, func(t *testing.T) { + // Enable parallel execution to check races + t.Parallel() + + out := masker.Mask(subtest.s) + require.Equal(t, subtest.expected, out) + }) + } +} + +func TestHybridMasker(t *testing.T) { + tests := []struct { + name, s, expected string + }{ + { + name: "simple", + s: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\nclient_secret=eyJhbGciOiJSUzI1NiIsImVhcCI6MSwiaXJpIjoiY2hlNWphMmowaW9kN3E0c21kbDAiLCJraWQiOiU1NzVkYjAifQ.eyJhdWQiOiJ1cy1jbG91ZC5hY3JvbmlzLmNvbSIs7QI0ctcs7ZN8OsCDUxhM4liWPGg&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000&AAAAA", + expected: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\nclient_secret=***&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000&*****", + }, + { + name: "middle", + s: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nAAAAA\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\ngrant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&client_secret=eyJhbGciOiJSUzI1NiIsImVhcCI6MSwiaXJpIjoiY2hlNWphMmowaW9kN3E0c21kbDAiLCJraWQiOiU1NzVkYjAifQ.eyJhdWQiOiJ1cy1jbG91ZC5hY3JvbmlzLmNvbSIs7QI0ctcs7ZN8OsCDUxhM4liWPGg&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000", + expected: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\n*****\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\ngrant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&client_secret=***&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000", + }, + } + + masksForTest := append(testMMasks, MaskingRuleConfig{Masks: []MaskConfig{{`AAAAA`, `*****`}}}) + + masker := NewMasker(masksForTest) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + out := masker.Mask(test.s) + require.Equal(t, test.expected, out) + }) + } +} + +func TestHybridMaskerWithShuffle(t *testing.T) { + tests := []struct { + name, s, expected string + }{ + { + name: "simple", + s: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nBBBBBUser-Agent: test-agent\r\nContent-Length: 3691\r\nDDDDDContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\nclient_secret=eyJhbGciOiJSUzI1NiIsImVhcCI6MSwiaXJpIjoiY2hlNWphMmowaW9kN3E0c21kbDAiLCJraWQiOiU1NzVkYjAifQ.eyJhdWQiOiJ1cy1jbG91ZC5hY3JvbmlzLmNvbSIs7QI0ctcs7ZN8OsCDUxhM4liWPGg&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000&AAAAA", + expected: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\n*****User-Agent: test-agent\r\nContent-Length: 3691\r\n*****Content-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\nclient_secret=***&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000&*****", + }, + { + name: "middle", + s: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\nAAAAA\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\ngrant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&client_secret=eyJhbGciOiJSUzI1NiIsImVhcCI6MSwiaXJpIjoiY2hlNWphMmowaW9kN3E0c21kbDAiLCJraWQiOiU1NzVkYjAifQ.eyJhdWQiOiJ1cy1jbG91ZC5hY3JvbmlzLmNvbSIs7QI0ctcs7ZN8OsCDUxhM4liWPGg&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000", + expected: "POST /idp/token HTTP/1.1\r\nHost: example.com\r\nUser-Agent: test-agent\r\n*****\r\nContent-Length: 3691\r\nContent-Type: application/x-www-form-urlencoded\r\nAccept-Encoding: gzip\r\n\r\ngrant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&client_secret=***&scope=scope1%3Atenant-id%0000000000-0000-0000-0000-000000000000", + }, + } + + // shuffle + masksForTest := append(testMMasks, + MaskingRuleConfig{Masks: []MaskConfig{{`AAAAA`, `*****`}}}, + MaskingRuleConfig{Masks: []MaskConfig{{`BBBBB`, `*****`}}}, + MaskingRuleConfig{Masks: []MaskConfig{{`CCCCC`, `*****`}}}, + MaskingRuleConfig{Masks: []MaskConfig{{`DDDDD`, `*****`}}}, + ) + + masker := NewMasker(masksForTest) + for _, test := range tests { + for i := 0; i < 10; i++ { + rand.Shuffle(len(masksForTest), func(i, j int) { + masksForTest[i], masksForTest[j] = masksForTest[j], masksForTest[i] + }) + t.Run(test.name, func(t *testing.T) { + out := masker.Mask(test.s) + require.Equal(t, test.expected, out) + }) + } + } +} + +func BenchmarkMasker(b *testing.B) { + r := NewMasker(testMMasks) + b.ResetTimer() for _, test := range []struct{ name, text string }{ { name: "0 matches", - text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refreSDFsh_token=sjdkjlk&api_kSDFey=lskdjflksjdl& AuthorSDFization: Bearer ABC`, + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refreSDFsh_token=sjdkjlk&api_kSDFey=lskdjflksjdl& AuthorSDFization: Bearer ABC&pwWd=GGGG&sOlt=HHHH&tenNant=123123123`, }, { name: "1 match", - text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refreSDFsh_token=sjdkjlk&id_token=lskdjflksjdl& AuthorSDFization: Bearer ABC`, + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refreSDFsh_token=sjdkjlk&id_token=lskdjflksjdl& AuthorSDFization: Bearer ABC&pwWd=GGGG&sOlt=HHHH&tenNant=123123123`, }, { - name: "2 matches", - text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refresh_token=sjdkjlk&id_token=lskdjflksjdl& AuthorSDFization: Bearer ABC`, + name: "3 matches", + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refresh_token=sjdkjlk&id_token=lskdjflksjdl& AuthorSDFization: Bearer ABC&&pwWd=GGGG&sOlt=HHHH&tenant=123123123`, }, } { b.Run(test.name, func(b *testing.B) { @@ -196,3 +805,68 @@ func BenchmarkMasker(b *testing.B) { }) } } + +func BenchmarkMaskerWithContains(b *testing.B) { + r := NewMasker(testMMasks) + + maskContains := func(r *Masker, s string) string { + lower := strings.ToLower(s) + for _, fieldMask := range r.fieldMasks { + if strings.Contains(lower, fieldMask.Field) { + for _, rep := range fieldMask.Masks { + s = rep.RegExp.ReplaceAllString(s, rep.Mask) + } + } + } + return s + } + + b.ResetTimer() + for _, test := range []struct{ name, text string }{ + { + name: "0 matches", + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refreSDFsh_token=sjdkjlk&api_kSDFey=lskdjflksjdl& AuthorSDFization: Bearer ABC&pwWd=GGGG&sOlt=HHHH&tenNant=123123123`, + }, + { + name: "1 match", + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refreSDFsh_token=sjdkjlk&id_token=lskdjflksjdl& AuthorSDFization: Bearer ABC&pwWd=GGGG&sOlt=HHHH&tenNant=123123123`, + }, + { + name: "3 matches", + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refresh_token=sjdkjlk&id_token=lskdjflksjdl& AuthorSDFization: Bearer ABC&&pwWd=GGGG&sOlt=HHHH&tenant=123123123`, + }, + } { + b.Run(test.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + maskContains(r, test.text) + } + }) + } +} + +func BenchmarkParallelMasker(b *testing.B) { + r := NewMasker(testMMasks) + b.ResetTimer() + for _, test := range []struct{ name, text string }{ + { + name: "0 matches", + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refreSDFsh_token=sjdkjlk&api_kSDFey=lskdjflksjdl& AuthorSDFization: Bearer ABC&pwWd=GGGG&sOlt=HHHH&tenNant=123123123`, + }, + { + name: "1 match", + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refreSDFsh_token=sjdkjlk&id_token=lskdjflksjdl& AuthorSDFization: Bearer ABC&pwWd=GGGG&sOlt=HHHH&tenNant=123123123`, + }, + { + name: "3 matches", + text: `{"passwSDFord": "abc", "clientSDF_secret": "clientkey123", "accesSDFSs_token": "accessToken123", "refreshSDF_token": "refresh123"}, assertSDFion=abcdef&client_sSDFecret=sjdlkfjl&refresh_token=sjdkjlk&id_token=lskdjflksjdl& AuthorSDFization: Bearer ABC&&pwWd=GGGG&sOlt=HHHH&tenant=123123123`, + }, + } { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + for i := 0; i < b.N; i++ { + r.Mask(test.text) + } + } + }) + } +}