Skip to content

Commit

Permalink
Merge pull request #130 from hellofresh/patch/EES-6173-func-args
Browse files Browse the repository at this point in the history
Support arguments on generation functions
  • Loading branch information
lucasmdrs authored Aug 2, 2021
2 parents 4b6cca5 + c052835 commit a0d9648
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 16 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ You can anonymise specific columns in your table using the `Anonymise` key. Anon
[Tables.Anonymise]
email = "EmailAddress"
firstName = "FirstName"
postalCode = "DigitsN:5"
creditCard = "CreditCardNum"
voucher = "Password:3:5:true"

[[Tables]]
Name = "users"
Expand All @@ -219,7 +222,7 @@ You can anonymise specific columns in your table using the `Anonymise` key. Anon
password = "literal:1234"
```

This would replace these 4 columns from the `customer` and `users` tables and run `fake.EmailAddress` and `fake.FirstName` against them respectively. We can use `literal:[some-constant-value]` to specify a constant we want to write for a column. In this case, `password = "literal:1234"` would write `1234` for every row in the password column of the users table.
This would replace all the specified columns from the `customer` and `users` tables with the spcified fake function. If a function requires arguments to be passed, we can specify them splitting with the `:` character, the default value of a argument type will be used in case the provided one is invalid or missing. There is also a special function `literal:[some-constant-value]` to specify a constant we want to write for a column. In this case, `password = "literal:1234"` would write `1234` for every row in the password column of the users table.

#### Available data types for anonymisation

Expand Down
60 changes: 58 additions & 2 deletions pkg/anonymiser/anonymiser.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/hex"
"fmt"
"reflect"
"strconv"
"strings"

"github.com/hellofresh/klepto/pkg/config"
Expand All @@ -19,9 +20,19 @@ const (
literalPrefix = "literal:"
email = "EmailAddress"
username = "UserName"
password = "Password"
)

var requireArgs = map[string]bool{
"CharactersN": true,
"DigitsN": true,
"ParagraphsN": true,
"SentencesN": true,
"WordsN": true,
"CreditCardNum": true,
"Password": true,
"Year": true,
}

type (
anonymiser struct {
reader.Reader
Expand Down Expand Up @@ -66,6 +77,7 @@ func (a *anonymiser) ReadTable(tableName string, rowChan chan<- database.Row, op
continue
}

fakerType, args := getTypeArgs(fakerType)
faker, found := Functions[fakerType]
if !found {
logger.WithField("anonymiser", fakerType).Error("Anonymiser is not found")
Expand All @@ -87,7 +99,7 @@ func (a *anonymiser) ReadTable(tableName string, rowChan chan<- database.Row, op
hex.EncodeToString(b),
)
default:
value = faker.Call([]reflect.Value{})[0].String()
value = faker.Call(args)[0].String()
}
row[column] = value
}
Expand All @@ -102,3 +114,47 @@ func (a *anonymiser) ReadTable(tableName string, rowChan chan<- database.Row, op

return nil
}

func getTypeArgs(fakerType string) (string, []reflect.Value) {
parts := strings.Split(fakerType, ":")
fType := parts[0]
if !requireArgs[fType] {
return fType, nil
}

return fType, parseArgs(Functions[fType], parts[1:])
}

func parseArgs(function reflect.Value, values []string) []reflect.Value {
t := function.Type()
argsN := t.NumIn()
if argsN > len(values) {
log.WithFields(log.Fields{"expected": argsN, "received": len(values)}).Warn("Not enough arguments passed. Falling back to defaults")
values = append(values, make([]string, argsN-len(values))...)
}

argsV := make([]reflect.Value, argsN)
for i := 0; i < argsN; i++ {
argT := t.In(i)
v := reflect.New(argT).Elem()
switch argT.Kind() {
case reflect.String:
v.SetString(values[i])
case reflect.Int:
n, err := strconv.ParseInt(values[i], 10, 0)
if err != nil {
log.WithField("argument", values[i]).Warn("Failed to parse argument as string. Falling back to default")
}
v.SetInt(n)
case reflect.Bool:
b, err := strconv.ParseBool(values[i])
if err != nil {
log.WithField("argument", values[i]).Warn("Failed to parse argument as boolean. Falling back to default")
}
v.SetBool(b)
}

argsV[i] = v
}
return argsV
}
98 changes: 98 additions & 0 deletions pkg/anonymiser/anonymiser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,30 @@ func TestReadTable(t *testing.T) {
opts: reader.ReadTableOpt{},
config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test": "Hello"}}},
},
{
scenario: "when column anonymiser require args",
function: testWhenColumnAnonymiserRequireArgs,
opts: reader.ReadTableOpt{},
config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test": "DigitsN:20"}}},
},
{
scenario: "when column anonymiser require multiple args",
function: testWhenColumnAnonymiserRequireMultipleArgs,
opts: reader.ReadTableOpt{},
config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test": "Year:2020:2021"}}},
},
{
scenario: "when column anonymiser require args but no values are passed",
function: testWhenColumnAnonymiserRequireArgsNoValues,
opts: reader.ReadTableOpt{},
config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test": "CreditCardNum"}}},
},
{
scenario: "when column anonymiser require args but the value passed is invalid",
function: testWhenColumnAnonymiserRequireArgsInvalidValues,
opts: reader.ReadTableOpt{},
config: config.Tables{{Name: "test", Anonymise: map[string]string{"column_test1": "CharactersN:invalid", "column_test2": "Password:1:2:yes"}}},
},
}

for _, test := range tests {
Expand Down Expand Up @@ -137,6 +161,80 @@ func testWhenColumnAnonymiserIsInvalid(t *testing.T, opts reader.ReadTableOpt, t
}
}

func testWhenColumnAnonymiserRequireArgs(t *testing.T, opts reader.ReadTableOpt, tables config.Tables) {
anonymiser := NewAnonymiser(&mockReader{}, tables)

rowChan := make(chan database.Row)
defer close(rowChan)

err := anonymiser.ReadTable("test", rowChan, opts)
require.NoError(t, err)

timeoutChan := time.After(waitTimeout)
select {
case row := <-rowChan:
assert.NotEqual(t, "to_be_anonimised", row["column_test"])
assert.Len(t, row["column_test"], 20)
case <-timeoutChan:
assert.FailNow(t, "Failing due to timeout")
}
}

func testWhenColumnAnonymiserRequireMultipleArgs(t *testing.T, opts reader.ReadTableOpt, tables config.Tables) {
anonymiser := NewAnonymiser(&mockReader{}, tables)

rowChan := make(chan database.Row)
defer close(rowChan)

err := anonymiser.ReadTable("test", rowChan, opts)
require.NoError(t, err)

timeoutChan := time.After(waitTimeout)
select {
case row := <-rowChan:
assert.NotEqual(t, "to_be_anonimised", row["column_test"])
case <-timeoutChan:
assert.FailNow(t, "Failing due to timeout")
}
}

func testWhenColumnAnonymiserRequireArgsNoValues(t *testing.T, opts reader.ReadTableOpt, tables config.Tables) {
anonymiser := NewAnonymiser(&mockReader{}, tables)

rowChan := make(chan database.Row)
defer close(rowChan)

err := anonymiser.ReadTable("test", rowChan, opts)
require.NoError(t, err)

timeoutChan := time.After(waitTimeout)
select {
case row := <-rowChan:
assert.NotEqual(t, "to_be_anonimised", row["column_test"])
case <-timeoutChan:
assert.FailNow(t, "Failing due to timeout")
}
}

func testWhenColumnAnonymiserRequireArgsInvalidValues(t *testing.T, opts reader.ReadTableOpt, tables config.Tables) {
anonymiser := NewAnonymiser(&mockReader{}, tables)

rowChan := make(chan database.Row)
defer close(rowChan)

err := anonymiser.ReadTable("test", rowChan, opts)
require.NoError(t, err)

timeoutChan := time.After(waitTimeout)
select {
case row := <-rowChan:
assert.NotEqual(t, "to_be_anonimised", row["column_test1"])
assert.NotEqual(t, "to_be_anonimised", row["column_test2"])
case <-timeoutChan:
assert.FailNow(t, "Failing due to timeout")
}
}

type mockReader struct{}

func (m *mockReader) GetTables() ([]string, error) { return []string{"table_test"}, nil }
Expand Down
21 changes: 8 additions & 13 deletions pkg/anonymiser/fake.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a0d9648

Please sign in to comment.