Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NEOS-251: refactor email transformer and take our faker lib #475

Merged
merged 2 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 137 additions & 34 deletions worker/internal/benthos/transformers/email.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ import (

"github.com/benthosdev/benthos/v4/public/bloblang"
_ "github.com/benthosdev/benthos/v4/public/components/io"
"github.com/bxcodec/faker/v4"
transformer_utils "github.com/nucleuscloud/neosync/worker/internal/benthos/transformers/utils"
)

var tld = []string{"com", "org", "net", "edu", "gov", "app", "dev"}

func init() {

spec := bloblang.NewPluginSpec().
Expand All @@ -29,8 +31,9 @@ func init() {
return nil, err
}

return bloblang.StringMethod(func(s string) (any, error) {
res, err := ProcessEmail(s, preserveLength, preserveDomain)
return bloblang.StringMethod(func(e string) (any, error) {

res, err := GenerateEmail(e, preserveLength, preserveDomain)
return res, err
}), nil
})
Expand All @@ -42,69 +45,169 @@ func init() {
}

// main transformer logic goes here
func ProcessEmail(email string, preserveLength, preserveDomain bool) (string, error) {
func GenerateEmail(e string, preserveLength, preserveDomain bool) (string, error) {

var returnValue string
var err error

if !preserveLength && preserveDomain {

parsedEmail, err := parseEmail(email)
returnValue, err = GenerateEmailPreserveDomain(e, true)
if err != nil {
return "", err
}

} else if preserveLength && !preserveDomain {

returnValue, err = GenerateEmailPreserveLength(e, true)
if err != nil {
return "", err
}

} else if preserveLength && preserveDomain {

returnValue, err = GenerateEmailPreserveDomainAndLength(e, true, true)
if err != nil {
return "", err
}

} else {

un, err := GenerateRandomUsername()
if err != nil {
return "", nil
}

domain, err := GenerateDomain()
if err != nil {
return "", nil
}

// generate random email
returnValue = un + domain
}

return returnValue, nil
}

// Generate a random email and preserve the input email's domain
func GenerateEmailPreserveDomain(e string, pd bool) (string, error) {

parsedEmail, err := parseEmail(e)
if err != nil {
return "", fmt.Errorf("invalid email: %s", email)
return "", fmt.Errorf("invalid email: %s", e)
}

var returnValue string
un, err := GenerateRandomUsername()
if err != nil {
return "", nil
}

if preserveDomain && !preserveLength {
return strings.ToLower(un) + "@" + parsedEmail[1], err
}

returnValue = strings.ToLower(faker.Username()) + "@" + parsedEmail[1]
// Preserve the length of email but not the domain name
func GenerateEmailPreserveLength(e string, pl bool) (string, error) {

} else if preserveLength && !preserveDomain {
var res string

// preserve length of email but not the domain
parsedEmail, err := parseEmail(e)
if err != nil {
return "", fmt.Errorf("invalid email: %s", e)
}

splitDomain := strings.Split(parsedEmail[1], ".") // split the domain to account for different domain name lengths
// split the domain to account for different domain name lengths
splitDomain := strings.Split(parsedEmail[1], ".")

domain := sliceString(faker.DomainName(), len(splitDomain[0]))
domain, err := GenerateDomain()
if err != nil {
return "", err
}

tld := sliceString(faker.DomainName(), len(splitDomain[1]))
splitGeneratedDomain := strings.Split(domain, ".")

returnValue = sliceString(faker.Username(), len(parsedEmail[0])) + "@" + domain + "." + tld
// the +1 is because we include an @ sign we include in the domain and we want to keep that
domainName := transformer_utils.SliceString(splitGeneratedDomain[0], len(splitDomain[0])+1)

} else if preserveDomain && preserveLength {
tld := transformer_utils.SliceString(splitGeneratedDomain[1], len(splitDomain[1]))

// preserve domain and length of the email -> keep the domain the same but slice the username to be the same length as the input username
unLength := len(parsedEmail[0])
un, err := GenerateRandomStringWithLength(int64(len(parsedEmail[0])))
if err != nil {
return "", nil
}

un := faker.Username()
res = transformer_utils.SliceString(un, len(parsedEmail[0])) + domainName + "." + tld

returnValue = sliceString(un, unLength) + "@" + parsedEmail[1]
return res, err

} else {
// generate random email
}

returnValue = faker.Email()
// preserve domain and length of the email -> keep the domain the same but slice the username to be the same length as the input username
func GenerateEmailPreserveDomainAndLength(e string, pd, pl bool) (string, error) {

parsedEmail, err := parseEmail(e)
if err != nil {
return "", fmt.Errorf("invalid email: %s", e)
}

return returnValue, nil
unLength := len(parsedEmail[0])

un, err := GenerateRandomStringWithLength(int64(len(parsedEmail[0])))
if err != nil {
return "", err
}

res := transformer_utils.SliceString(un, unLength) + "@" + parsedEmail[1]

return res, err
}

func parseEmail(email string) ([]string, error) {
func GenerateDomain() (string, error) {

var result string

domain, err := GenerateRandomStringWithLength(6)

inputEmail, err := mail.ParseAddress(email)
if err != nil {
return "", fmt.Errorf("unable to generate random domain name")
}

return nil, fmt.Errorf("invalid email format: %s", email)
tld, err := transformer_utils.GetRandomValueFromSlice(tld)
if err != nil {
return "", err
}

parsedEmail := strings.Split(inputEmail.Address, "@")
result = "@" + domain + "." + tld

return result, err

}

func GenerateRandomUsername() (string, error) {

randLength, err := transformer_utils.GenerateRandomNumberWithBounds(3, 8)
if err != nil {
return "", err
}

username, err := GenerateRandomStringWithLength(int64(randLength))
if err != nil {
return "", err
}

return username, nil

return parsedEmail, nil
}

func sliceString(s string, l int) string {
func parseEmail(email string) ([]string, error) {

runes := []rune(s) // use runes instead of strings in order to avoid slicing a multi-byte character and returning invalid UTF-8
inputEmail, err := mail.ParseAddress(email)
if err != nil {

if l > len(runes) {
l = len(runes)
return nil, fmt.Errorf("invalid email format: %s", email)
}

return string(runes[:l])
parsedEmail := strings.Split(inputEmail.Address, "@")

return parsedEmail, nil
}
79 changes: 61 additions & 18 deletions worker/internal/benthos/transformers/email_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,49 +9,66 @@ import (
"github.com/stretchr/testify/assert"
)

func TestProcessEmailPreserveLengthTrue(t *testing.T) {
func TestGenerateEmailPreserveDomain(t *testing.T) {

email := "evia@gmail.com"
email := "evis@gmail.com"

res, err := ProcessEmail(email, true, true)
res, err := GenerateEmailPreserveDomain(email, true)

assert.NoError(t, err)
assert.Equal(t, len(res), len(email), "The length of the emails should be the same")
/* There is a very small chance that the randomly generated email address actually matches
the input email address which is why can't do an assert.NoEqual() but instead just have to check
that the email has the correct structrue */
assert.Equal(t, true, isValidEmail(res), "true", "The domain should not explicitly be preserved but randomly generated.")
}

func TestProcessEmailPreserveLengthFalse(t *testing.T) {
email := "johndoe@gmail.com"
func TestGenerateEmailPreserveLength(t *testing.T) {
email := "evis@gmail.com"

res, err := ProcessEmail(email, false, true)
res, err := GenerateEmailPreserveLength(email, true)

assert.NoError(t, err)
assert.Equal(t, true, isValidEmail(res), "The expected email should be have a valid email structure")
assert.Equal(t, len(email), len(res), "The length of the emails should be the same")
}

func TestProcessEmailNoPreserveDomain(t *testing.T) {

email := "[email protected]"
func TestGenerateEmailPreserveLengthTruePreserveDomainTrue(t *testing.T) {
email := "[email protected]"

res, err := ProcessEmail(email, true, false)
res, err := GenerateEmailPreserveDomainAndLength(email, true, true)

assert.NoError(t, err)
/* There is a very small chance that the randomly generated email address actually matches
the input email address which is why can't do an assert.NoEqual() but instead just have to check
that the email has the correct structrue */
assert.Equal(t, true, isValidEmail(res), "true", "The domain should not explicitly be preserved but randomly generated.")
assert.Equal(t, true, isValidEmail(res), "The expected email should be have a valid email structure")

}

func TestProcessEmailPreserveLengthFalsePreserveDomainFalse(t *testing.T) {
func TestGenerateEmailPreserveLengthFalsePreserveDomainFalse(t *testing.T) {
email := "[email protected]"

res, err := ProcessEmail(email, false, false)
res, err := GenerateEmail(email, false, false)

assert.NoError(t, err)
assert.Equal(t, true, isValidEmail(res), "The expected email should be have a valid email structure")

}

func TestGenerateDomain(t *testing.T) {

res, err := GenerateDomain()
assert.NoError(t, err)

assert.Equal(t, true, IsValidDomain(res))

}

func TestGenerateUsername(t *testing.T) {

res, err := GenerateRandomUsername()
assert.NoError(t, err)

assert.Equal(t, true, IsValidUsername(res))

}

func TestEmailTransformer(t *testing.T) {
mapping := `root = this.emailtransformer(true, true)`
ex, err := bloblang.Parse(mapping)
Expand All @@ -72,3 +89,29 @@ func isValidEmail(email string) bool {
regex := regexp.MustCompile(emailPattern)
return regex.MatchString(email)
}

func IsValidDomain(domain string) bool {
pattern := `^@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`

// Compile the regex pattern
re, err := regexp.Compile(pattern)
if err != nil {
return false
}

// Use the regex pattern to validate the email
return re.MatchString(domain)
}

func IsValidUsername(domain string) bool {
pattern := `^[a-zA-Z0-9]`

// Compile the regex pattern
re, err := regexp.Compile(pattern)
if err != nil {
return false
}

// Use the regex pattern to validate the email
return re.MatchString(domain)
}
57 changes: 57 additions & 0 deletions worker/internal/benthos/transformers/utils/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package transformer_utils

import (
"crypto/rand"
"errors"
"math/big"
)

// returns a random index from a one-dimensional slice
func GetRandomValueFromSlice(arr []string) (string, error) {
if len(arr) == 0 {
return "", errors.New("slice is empty")
}

randomIndex, err := rand.Int(rand.Reader, big.NewInt(int64(len(arr))))
if err != nil {
return "", err
}

return arr[randomIndex.Int64()], nil
}

func GenerateRandomNumberWithBounds(min, max int) (int, error) {

min64 := int64(min)
max64 := int64(max)

if min > max {
return 0, errors.New("min cannot be greater than max")
}

if min == max {
return min, nil
}

// Generate a random number in the range [0, max-min]
num, err := rand.Int(rand.Reader, big.NewInt(max64-min64+1))
if err != nil {
return 0, err
}

// Shift the range to [min, max]
return int(num.Int64() + min64), nil

}

func SliceString(s string, l int) string {

// use runes instead of strings in order to avoid slicing a multi-byte character and returning invalid UTF-8
runes := []rune(s)

if l > len(runes) {
l = len(runes)
}

return string(runes[:l])
}
Loading
Loading