Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NEOS-251: refactor email transformer and take our faker lib #475

Merged
merged 2 commits into from
Nov 1, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
refactor email transformer and take our faker lib
  • Loading branch information
evisdrenova committed Nov 1, 2023
commit 77a53790ae124eb9ae4b3ab9dbe7fc5da6c70c1c
171 changes: 137 additions & 34 deletions worker/internal/benthos/transformers/email.go
Original file line number Diff line number Diff line change
@@ -7,9 +7,11 @@

"github.com/benthosdev/benthos/v4/public/bloblang"
_ "github.com/benthosdev/benthos/v4/public/components/io"
"github.com/bxcodec/faker/v4"
transformer_utils "github.com/nucleuscloud/neosync/worker/internal/benthos/transformers/utils"
)

var tld = []string{"com", "org", "net", "edu", "gov", "app", "dev"}

func init() {

spec := bloblang.NewPluginSpec().
@@ -29,8 +31,9 @@
return nil, err
}

return bloblang.StringMethod(func(s string) (any, error) {
res, err := ProcessEmail(s, preserveLength, preserveDomain)
return bloblang.StringMethod(func(e string) (any, error) {

res, err := GenerateEmail(e, preserveLength, preserveDomain)
return res, err
}), nil
})
@@ -42,69 +45,169 @@
}

// main transformer logic goes here
func ProcessEmail(email string, preserveLength, preserveDomain bool) (string, error) {
func GenerateEmail(e string, preserveLength, preserveDomain bool) (string, error) {

var returnValue string
var err error

if !preserveLength && preserveDomain {

parsedEmail, err := parseEmail(email)
returnValue, err = GenerateEmailPreserveDomain(e, true)
if err != nil {
return "", err
}

Check warning on line 58 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L55-L58

Added lines #L55 - L58 were not covered by tests

} else if preserveLength && !preserveDomain {

returnValue, err = GenerateEmailPreserveLength(e, true)
if err != nil {
return "", err
}

Check warning on line 65 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L61-L65

Added lines #L61 - L65 were not covered by tests

} else if preserveLength && preserveDomain {

returnValue, err = GenerateEmailPreserveDomainAndLength(e, true, true)
if err != nil {
return "", err
}

Check warning on line 72 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L71-L72

Added lines #L71 - L72 were not covered by tests

} else {

un, err := GenerateRandomUsername()
if err != nil {
return "", nil
}

Check warning on line 79 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L78-L79

Added lines #L78 - L79 were not covered by tests

domain, err := GenerateDomain()
if err != nil {
return "", nil
}

Check warning on line 84 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L83-L84

Added lines #L83 - L84 were not covered by tests

// generate random email
returnValue = un + domain
}

return returnValue, nil
}

// Generate a random email and preserve the input email's domain
func GenerateEmailPreserveDomain(e string, pd bool) (string, error) {

parsedEmail, err := parseEmail(e)
if err != nil {
return "", fmt.Errorf("invalid email: %s", email)
return "", fmt.Errorf("invalid email: %s", e)

Check warning on line 98 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L98

Added line #L98 was not covered by tests
}

var returnValue string
un, err := GenerateRandomUsername()
if err != nil {
return "", nil
}

Check warning on line 104 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L103-L104

Added lines #L103 - L104 were not covered by tests

if preserveDomain && !preserveLength {
return strings.ToLower(un) + "@" + parsedEmail[1], err
}

returnValue = strings.ToLower(faker.Username()) + "@" + parsedEmail[1]
// Preserve the length of email but not the domain name
func GenerateEmailPreserveLength(e string, pl bool) (string, error) {

} else if preserveLength && !preserveDomain {
var res string

// preserve length of email but not the domain
parsedEmail, err := parseEmail(e)
if err != nil {
return "", fmt.Errorf("invalid email: %s", e)
}

Check warning on line 117 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L116-L117

Added lines #L116 - L117 were not covered by tests

splitDomain := strings.Split(parsedEmail[1], ".") // split the domain to account for different domain name lengths
// split the domain to account for different domain name lengths
splitDomain := strings.Split(parsedEmail[1], ".")

domain := sliceString(faker.DomainName(), len(splitDomain[0]))
domain, err := GenerateDomain()
if err != nil {
return "", err
}

Check warning on line 125 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L124-L125

Added lines #L124 - L125 were not covered by tests

tld := sliceString(faker.DomainName(), len(splitDomain[1]))
splitGeneratedDomain := strings.Split(domain, ".")

returnValue = sliceString(faker.Username(), len(parsedEmail[0])) + "@" + domain + "." + tld
// the +1 is because we include an @ sign we include in the domain and we want to keep that
domainName := transformer_utils.SliceString(splitGeneratedDomain[0], len(splitDomain[0])+1)

} else if preserveDomain && preserveLength {
tld := transformer_utils.SliceString(splitGeneratedDomain[1], len(splitDomain[1]))

// preserve domain and length of the email -> keep the domain the same but slice the username to be the same length as the input username
unLength := len(parsedEmail[0])
un, err := GenerateRandomStringWithLength(int64(len(parsedEmail[0])))
if err != nil {
return "", nil
}

Check warning on line 137 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L136-L137

Added lines #L136 - L137 were not covered by tests

un := faker.Username()
res = transformer_utils.SliceString(un, len(parsedEmail[0])) + domainName + "." + tld

returnValue = sliceString(un, unLength) + "@" + parsedEmail[1]
return res, err

} else {
// generate random email
}

returnValue = faker.Email()
// preserve domain and length of the email -> keep the domain the same but slice the username to be the same length as the input username
func GenerateEmailPreserveDomainAndLength(e string, pd, pl bool) (string, error) {

parsedEmail, err := parseEmail(e)
if err != nil {
return "", fmt.Errorf("invalid email: %s", e)

Check warning on line 150 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L150

Added line #L150 was not covered by tests
}

return returnValue, nil
unLength := len(parsedEmail[0])

un, err := GenerateRandomStringWithLength(int64(len(parsedEmail[0])))
if err != nil {
return "", err
}

Check warning on line 158 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L157-L158

Added lines #L157 - L158 were not covered by tests

res := transformer_utils.SliceString(un, unLength) + "@" + parsedEmail[1]

return res, err
}

func parseEmail(email string) ([]string, error) {
func GenerateDomain() (string, error) {

var result string

domain, err := GenerateRandomStringWithLength(6)

inputEmail, err := mail.ParseAddress(email)
if err != nil {
return "", fmt.Errorf("unable to generate random domain name")
}

Check warning on line 173 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L172-L173

Added lines #L172 - L173 were not covered by tests

return nil, fmt.Errorf("invalid email format: %s", email)
tld, err := transformer_utils.GetRandomValueFromSlice(tld)
if err != nil {
return "", err

Check warning on line 177 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L177

Added line #L177 was not covered by tests
}

parsedEmail := strings.Split(inputEmail.Address, "@")
result = "@" + domain + "." + tld

return result, err

}

func GenerateRandomUsername() (string, error) {

randLength, err := transformer_utils.GenerateRandomNumberWithBounds(3, 8)
if err != nil {
return "", err
}

Check warning on line 191 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L190-L191

Added lines #L190 - L191 were not covered by tests

username, err := GenerateRandomStringWithLength(int64(randLength))
if err != nil {
return "", err
}

Check warning on line 196 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L195-L196

Added lines #L195 - L196 were not covered by tests

return username, nil

return parsedEmail, nil
}

func sliceString(s string, l int) string {
func parseEmail(email string) ([]string, error) {

runes := []rune(s) // use runes instead of strings in order to avoid slicing a multi-byte character and returning invalid UTF-8
inputEmail, err := mail.ParseAddress(email)
if err != nil {

if l > len(runes) {
l = len(runes)
return nil, fmt.Errorf("invalid email format: %s", email)

Check warning on line 207 in worker/internal/benthos/transformers/email.go

Codecov / codecov/patch

worker/internal/benthos/transformers/email.go#L207

Added line #L207 was not covered by tests
}

return string(runes[:l])
parsedEmail := strings.Split(inputEmail.Address, "@")

return parsedEmail, nil
}
79 changes: 61 additions & 18 deletions worker/internal/benthos/transformers/email_test.go
Original file line number Diff line number Diff line change
@@ -9,49 +9,66 @@ import (
"github.com/stretchr/testify/assert"
)

func TestProcessEmailPreserveLengthTrue(t *testing.T) {
func TestGenerateEmailPreserveDomain(t *testing.T) {

email := "evia@gmail.com"
email := "evis@gmail.com"

res, err := ProcessEmail(email, true, true)
res, err := GenerateEmailPreserveDomain(email, true)

assert.NoError(t, err)
assert.Equal(t, len(res), len(email), "The length of the emails should be the same")
/* There is a very small chance that the randomly generated email address actually matches
the input email address which is why can't do an assert.NoEqual() but instead just have to check
that the email has the correct structrue */
assert.Equal(t, true, isValidEmail(res), "true", "The domain should not explicitly be preserved but randomly generated.")
}

func TestProcessEmailPreserveLengthFalse(t *testing.T) {
email := "johndoe@gmail.com"
func TestGenerateEmailPreserveLength(t *testing.T) {
email := "evis@gmail.com"

res, err := ProcessEmail(email, false, true)
res, err := GenerateEmailPreserveLength(email, true)

assert.NoError(t, err)
assert.Equal(t, true, isValidEmail(res), "The expected email should be have a valid email structure")
assert.Equal(t, len(email), len(res), "The length of the emails should be the same")
}

func TestProcessEmailNoPreserveDomain(t *testing.T) {

email := "[email protected]"
func TestGenerateEmailPreserveLengthTruePreserveDomainTrue(t *testing.T) {
email := "[email protected]"

res, err := ProcessEmail(email, true, false)
res, err := GenerateEmailPreserveDomainAndLength(email, true, true)

assert.NoError(t, err)
/* There is a very small chance that the randomly generated email address actually matches
the input email address which is why can't do an assert.NoEqual() but instead just have to check
that the email has the correct structrue */
assert.Equal(t, true, isValidEmail(res), "true", "The domain should not explicitly be preserved but randomly generated.")
assert.Equal(t, true, isValidEmail(res), "The expected email should be have a valid email structure")

}

func TestProcessEmailPreserveLengthFalsePreserveDomainFalse(t *testing.T) {
func TestGenerateEmailPreserveLengthFalsePreserveDomainFalse(t *testing.T) {
email := "[email protected]"

res, err := ProcessEmail(email, false, false)
res, err := GenerateEmail(email, false, false)

assert.NoError(t, err)
assert.Equal(t, true, isValidEmail(res), "The expected email should be have a valid email structure")

}

func TestGenerateDomain(t *testing.T) {

res, err := GenerateDomain()
assert.NoError(t, err)

assert.Equal(t, true, IsValidDomain(res))

}

func TestGenerateUsername(t *testing.T) {

res, err := GenerateRandomUsername()
assert.NoError(t, err)

assert.Equal(t, true, IsValidUsername(res))

}

func TestEmailTransformer(t *testing.T) {
mapping := `root = this.emailtransformer(true, true)`
ex, err := bloblang.Parse(mapping)
@@ -72,3 +89,29 @@ func isValidEmail(email string) bool {
regex := regexp.MustCompile(emailPattern)
return regex.MatchString(email)
}

func IsValidDomain(domain string) bool {
pattern := `^@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`

// Compile the regex pattern
re, err := regexp.Compile(pattern)
if err != nil {
return false
}

// Use the regex pattern to validate the email
return re.MatchString(domain)
}

func IsValidUsername(domain string) bool {
pattern := `^[a-zA-Z0-9]`

// Compile the regex pattern
re, err := regexp.Compile(pattern)
if err != nil {
return false
}

// Use the regex pattern to validate the email
return re.MatchString(domain)
}
57 changes: 57 additions & 0 deletions worker/internal/benthos/transformers/utils/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package transformer_utils

import (
"crypto/rand"
"errors"
"math/big"
)

// returns a random index from a one-dimensional slice
func GetRandomValueFromSlice(arr []string) (string, error) {
if len(arr) == 0 {
return "", errors.New("slice is empty")
}

randomIndex, err := rand.Int(rand.Reader, big.NewInt(int64(len(arr))))
if err != nil {
return "", err
}

return arr[randomIndex.Int64()], nil
}

func GenerateRandomNumberWithBounds(min, max int) (int, error) {

min64 := int64(min)
max64 := int64(max)

if min > max {
return 0, errors.New("min cannot be greater than max")
}

if min == max {
return min, nil
}

// Generate a random number in the range [0, max-min]
num, err := rand.Int(rand.Reader, big.NewInt(max64-min64+1))
if err != nil {
return 0, err
}

// Shift the range to [min, max]
return int(num.Int64() + min64), nil

}

func SliceString(s string, l int) string {

// use runes instead of strings in order to avoid slicing a multi-byte character and returning invalid UTF-8
runes := []rune(s)

if l > len(runes) {
l = len(runes)
}

return string(runes[:l])
}
Loading