From 74875841566ed8f235db0e2598fb963837087393 Mon Sep 17 00:00:00 2001 From: Lucas Medeiros Date: Mon, 2 Aug 2021 06:13:00 -0300 Subject: [PATCH 1/2] support arguments to fake functions and remove incompatible functions from list --- README.md | 5 +- pkg/anonymiser/anonymiser.go | 60 ++++++++++++++++++- pkg/anonymiser/anonymiser_test.go | 98 +++++++++++++++++++++++++++++++ pkg/anonymiser/fake.go | 5 -- 4 files changed, 160 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ecdec59..985f2a3 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,9 @@ You can anonymise specific columns in your table using the `Anonymise` key. Anon [Tables.Anonymise] email = "EmailAddress" firstName = "FirstName" + postalCode = "DigitsN:5" + creditCard = "CreditCardNum" + voucher = "Password:3:5:true" [[Tables]] Name = "users" @@ -219,7 +222,7 @@ You can anonymise specific columns in your table using the `Anonymise` key. Anon password = "literal:1234" ``` -This would replace these 4 columns from the `customer` and `users` tables and run `fake.EmailAddress` and `fake.FirstName` against them respectively. We can use `literal:[some-constant-value]` to specify a constant we want to write for a column. In this case, `password = "literal:1234"` would write `1234` for every row in the password column of the users table. +This would replace all the specified columns from the `customer` and `users` tables with the spcified fake function. If a function requires arguments to be passed, we can specify them splitting with the `:` character, the default value of a argument type will be used in case the provided one is invalid or missing. There is also a special function `literal:[some-constant-value]` to specify a constant we want to write for a column. In this case, `password = "literal:1234"` would write `1234` for every row in the password column of the users table. #### Available data types for anonymisation diff --git a/pkg/anonymiser/anonymiser.go b/pkg/anonymiser/anonymiser.go index 7da589f..67c9d90 100644 --- a/pkg/anonymiser/anonymiser.go +++ b/pkg/anonymiser/anonymiser.go @@ -5,6 +5,7 @@ import ( "encoding/hex" "fmt" "reflect" + "strconv" "strings" "github.com/hellofresh/klepto/pkg/config" @@ -19,9 +20,19 @@ const ( literalPrefix = "literal:" email = "EmailAddress" username = "UserName" - password = "Password" ) +var requireArgs = map[string]bool{ + "CharactersN": true, + "DigitsN": true, + "ParagraphsN": true, + "SentencesN": true, + "WordsN": true, + "CreditCardNum": true, + "Password": true, + "Year": true, +} + type ( anonymiser struct { reader.Reader @@ -66,6 +77,7 @@ func (a *anonymiser) ReadTable(tableName string, rowChan chan<- database.Row, op continue } + fakerType, args := getTypeArgs(fakerType) faker, found := Functions[fakerType] if !found { logger.WithField("anonymiser", fakerType).Error("Anonymiser is not found") @@ -87,7 +99,7 @@ func (a *anonymiser) ReadTable(tableName string, rowChan chan<- database.Row, op hex.EncodeToString(b), ) default: - value = faker.Call([]reflect.Value{})[0].String() + value = faker.Call(args)[0].String() } row[column] = value } @@ -102,3 +114,47 @@ func (a *anonymiser) ReadTable(tableName string, rowChan chan<- database.Row, op return nil } + +func getTypeArgs(fakerType string) (string, []reflect.Value) { + parts := strings.Split(fakerType, ":") + fType := parts[0] + if !requireArgs[fType] { + return fType, nil + } + + return fType, parseArgs(Functions[fType], parts[1:]) +} + +func parseArgs(function reflect.Value, values []string) []reflect.Value { + t := function.Type() + argsN := t.NumIn() + if argsN > len(values) { + log.WithFields(log.Fields{"expected": argsN, "received": len(values)}).Warn("Not enough arguments passed. Falling back to defaults") + values = append(values, make([]string, argsN-len(values))...) + } + + argsV := make([]reflect.Value, argsN) + for i := 0; i < argsN; i++ { + argT := t.In(i) + v := reflect.New(argT).Elem() + switch argT.Kind() { + case reflect.String: + v.SetString(values[i]) + case reflect.Int: + n, err := strconv.ParseInt(values[i], 10, 0) + if err != nil { + log.WithField("argument", values[i]).Warn("Failed to parse argument as string. Falling back to default") + } + v.SetInt(n) + case reflect.Bool: + b, err := strconv.ParseBool(values[i]) + if err != nil { + log.WithField("argument", values[i]).Warn("Failed to parse argument as boolean. Falling back to default") + } + v.SetBool(b) + } + + argsV[i] = v + } + return argsV +} diff --git a/pkg/anonymiser/anonymiser_test.go b/pkg/anonymiser/anonymiser_test.go index 3a060b5..3a514e0 100644 --- a/pkg/anonymiser/anonymiser_test.go +++ b/pkg/anonymiser/anonymiser_test.go @@ -54,6 +54,30 @@ func TestReadTable(t *testing.T) { opts: reader.ReadTableOpt{}, config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test": "Hello"}}}, }, + { + scenario: "when column anonymiser require args", + function: testWhenColumnAnonymiserRequireArgs, + opts: reader.ReadTableOpt{}, + config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test": "DigitsN:20"}}}, + }, + { + scenario: "when column anonymiser require multiple args", + function: testWhenColumnAnonymiserRequireMultipleArgs, + opts: reader.ReadTableOpt{}, + config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test": "Year:2020:2021"}}}, + }, + { + scenario: "when column anonymiser require args but no values are passed", + function: testWhenColumnAnonymiserRequireArgsNoValues, + opts: reader.ReadTableOpt{}, + config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test": "CreditCardNum"}}}, + }, + { + scenario: "when column anonymiser require args but the value passed is invalid", + function: testWhenColumnAnonymiserRequireArgsInvalidValues, + opts: reader.ReadTableOpt{}, + config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test1": "CharactersN:invalid", "column_test2": "Password:1:2:yes"}}}, + }, } for _, test := range tests { @@ -137,6 +161,80 @@ func testWhenColumnAnonymiserIsInvalid(t *testing.T, opts reader.ReadTableOpt, t } } +func testWhenColumnAnonymiserRequireArgs(t *testing.T, opts reader.ReadTableOpt, tables config.Tables) { + anonymiser := NewAnonymiser(&mockReader{}, tables) + + rowChan := make(chan database.Row) + defer close(rowChan) + + err := anonymiser.ReadTable("test", rowChan, opts) + require.NoError(t, err) + + timeoutChan := time.After(waitTimeout) + select { + case row := <-rowChan: + assert.NotEqual(t, "to_be_anonimised", row["column_test"]) + assert.Len(t, row["column_test"], 20) + case <-timeoutChan: + assert.FailNow(t, "Failing due to timeout") + } +} + +func testWhenColumnAnonymiserRequireMultipleArgs(t *testing.T, opts reader.ReadTableOpt, tables config.Tables) { + anonymiser := NewAnonymiser(&mockReader{}, tables) + + rowChan := make(chan database.Row) + defer close(rowChan) + + err := anonymiser.ReadTable("test", rowChan, opts) + require.NoError(t, err) + + timeoutChan := time.After(waitTimeout) + select { + case row := <-rowChan: + assert.NotEqual(t, "to_be_anonimised", row["column_test"]) + case <-timeoutChan: + assert.FailNow(t, "Failing due to timeout") + } +} + +func testWhenColumnAnonymiserRequireArgsNoValues(t *testing.T, opts reader.ReadTableOpt, tables config.Tables) { + anonymiser := NewAnonymiser(&mockReader{}, tables) + + rowChan := make(chan database.Row) + defer close(rowChan) + + err := anonymiser.ReadTable("test", rowChan, opts) + require.NoError(t, err) + + timeoutChan := time.After(waitTimeout) + select { + case row := <-rowChan: + assert.NotEqual(t, "to_be_anonimised", row["column_test"]) + case <-timeoutChan: + assert.FailNow(t, "Failing due to timeout") + } +} + +func testWhenColumnAnonymiserRequireArgsInvalidValues(t *testing.T, opts reader.ReadTableOpt, tables config.Tables) { + anonymiser := NewAnonymiser(&mockReader{}, tables) + + rowChan := make(chan database.Row) + defer close(rowChan) + + err := anonymiser.ReadTable("test", rowChan, opts) + require.NoError(t, err) + + timeoutChan := time.After(waitTimeout) + select { + case row := <-rowChan: + assert.NotEqual(t, "to_be_anonimised", row["column_test1"]) + assert.NotEqual(t, "to_be_anonimised", row["column_test2"]) + case <-timeoutChan: + assert.FailNow(t, "Failing due to timeout") + } +} + type mockReader struct{} func (m *mockReader) GetTables() ([]string, error) { return []string{"table_test"}, nil } diff --git a/pkg/anonymiser/fake.go b/pkg/anonymiser/fake.go index e6d860e..6de5b61 100644 --- a/pkg/anonymiser/fake.go +++ b/pkg/anonymiser/fake.go @@ -30,8 +30,6 @@ var Functions = map[string]reflect.Value{ "EmailAddress": reflect.ValueOf(fake.EmailAddress), "EmailBody": reflect.ValueOf(fake.EmailBody), "EmailSubject": reflect.ValueOf(fake.EmailSubject), - "EnFallback": reflect.ValueOf(fake.EnFallback), - "FS": reflect.ValueOf(fake.FS), "FemaleFirstName": reflect.ValueOf(fake.FemaleFirstName), "FemaleFullName": reflect.ValueOf(fake.FemaleFullName), "FemaleFullNameWithPrefix": reflect.ValueOf(fake.FemaleFullNameWithPrefix), @@ -81,11 +79,9 @@ var Functions = map[string]reflect.Value{ "Phone": reflect.ValueOf(fake.Phone), "Product": reflect.ValueOf(fake.Product), "ProductName": reflect.ValueOf(fake.ProductName), - "Seed": reflect.ValueOf(fake.Seed), "Sentence": reflect.ValueOf(fake.Sentence), "Sentences": reflect.ValueOf(fake.Sentences), "SentencesN": reflect.ValueOf(fake.SentencesN), - "SetLang": reflect.ValueOf(fake.SetLang), "SimplePassword": reflect.ValueOf(fake.SimplePassword), "State": reflect.ValueOf(fake.State), "StateAbbrev": reflect.ValueOf(fake.StateAbbrev), @@ -93,7 +89,6 @@ var Functions = map[string]reflect.Value{ "StreetAddress": reflect.ValueOf(fake.StreetAddress), "Title": reflect.ValueOf(fake.Title), "TopLevelDomain": reflect.ValueOf(fake.TopLevelDomain), - "UseExternalData": reflect.ValueOf(fake.UseExternalData), "UserAgent": reflect.ValueOf(fake.UserAgent), "UserName": reflect.ValueOf(fake.UserName), "WeekDay": reflect.ValueOf(fake.WeekDay), From c052835df6cab2cf368aa7fbe3014164b6d18da2 Mon Sep 17 00:00:00 2001 From: Lucas Medeiros Date: Mon, 2 Aug 2021 06:21:16 -0300 Subject: [PATCH 2/2] comment func signature for arguments documentation --- pkg/anonymiser/fake.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/anonymiser/fake.go b/pkg/anonymiser/fake.go index 6de5b61..9b48a1a 100644 --- a/pkg/anonymiser/fake.go +++ b/pkg/anonymiser/fake.go @@ -12,19 +12,19 @@ var Functions = map[string]reflect.Value{ "Brand": reflect.ValueOf(fake.Brand), "Character": reflect.ValueOf(fake.Character), "Characters": reflect.ValueOf(fake.Characters), - "CharactersN": reflect.ValueOf(fake.CharactersN), + "CharactersN": reflect.ValueOf(fake.CharactersN), //func fake.CharactersN(n int) string "City": reflect.ValueOf(fake.City), "Color": reflect.ValueOf(fake.Color), "Company": reflect.ValueOf(fake.Company), "Continent": reflect.ValueOf(fake.Continent), "Country": reflect.ValueOf(fake.Country), - "CreditCardNum": reflect.ValueOf(fake.CreditCardNum), + "CreditCardNum": reflect.ValueOf(fake.CreditCardNum), //func fake.CreditCardNum(vendor string) string "CreditCardType": reflect.ValueOf(fake.CreditCardType), "Currency": reflect.ValueOf(fake.Currency), "CurrencyCode": reflect.ValueOf(fake.CurrencyCode), "Day": reflect.ValueOf(fake.Day), "Digits": reflect.ValueOf(fake.Digits), - "DigitsN": reflect.ValueOf(fake.DigitsN), + "DigitsN": reflect.ValueOf(fake.DigitsN), //func fake.DigitsN(n int) string "DomainName": reflect.ValueOf(fake.DomainName), "DomainZone": reflect.ValueOf(fake.DomainZone), "EmailAddress": reflect.ValueOf(fake.EmailAddress), @@ -73,15 +73,15 @@ var Functions = map[string]reflect.Value{ "MonthShort": reflect.ValueOf(fake.MonthShort), "Paragraph": reflect.ValueOf(fake.Paragraph), "Paragraphs": reflect.ValueOf(fake.Paragraphs), - "ParagraphsN": reflect.ValueOf(fake.ParagraphsN), - "Password": reflect.ValueOf(fake.Password), + "ParagraphsN": reflect.ValueOf(fake.ParagraphsN), //func fake.ParagraphsN(n int) string + "Password": reflect.ValueOf(fake.Password), //func fake.Password(atLeast int, atMost int, allowUpper bool, allowNumeric bool, allowSpecial bool) string "Patronymic": reflect.ValueOf(fake.Patronymic), "Phone": reflect.ValueOf(fake.Phone), "Product": reflect.ValueOf(fake.Product), "ProductName": reflect.ValueOf(fake.ProductName), "Sentence": reflect.ValueOf(fake.Sentence), "Sentences": reflect.ValueOf(fake.Sentences), - "SentencesN": reflect.ValueOf(fake.SentencesN), + "SentencesN": reflect.ValueOf(fake.SentencesN), //func fake.SentencesN(n int) string "SimplePassword": reflect.ValueOf(fake.SimplePassword), "State": reflect.ValueOf(fake.State), "StateAbbrev": reflect.ValueOf(fake.StateAbbrev), @@ -96,7 +96,7 @@ var Functions = map[string]reflect.Value{ "WeekdayNum": reflect.ValueOf(fake.WeekdayNum), "Word": reflect.ValueOf(fake.Word), "Words": reflect.ValueOf(fake.Words), - "WordsN": reflect.ValueOf(fake.WordsN), - "Year": reflect.ValueOf(fake.Year), + "WordsN": reflect.ValueOf(fake.WordsN), //func fake.WordsN(n int) string + "Year": reflect.ValueOf(fake.Year), //func fake.Year(from int, to int) int "Zip": reflect.ValueOf(fake.Zip), }