diff --git a/plugins/extractors/bigquery/bigquery.go b/plugins/extractors/bigquery/bigquery.go index b749cdc2..289e6b64 100644 --- a/plugins/extractors/bigquery/bigquery.go +++ b/plugins/extractors/bigquery/bigquery.go @@ -2,25 +2,18 @@ package bigquery import ( "context" - "crypto/rand" _ "embed" // used to print the embedded assets "encoding/base64" "encoding/json" "fmt" "html/template" - "math/big" + "math/rand" "strings" "sync" "cloud.google.com/go/bigquery" datacatalog "cloud.google.com/go/datacatalog/apiv1" "cloud.google.com/go/datacatalog/apiv1/datacatalogpb" - "github.com/goto/meteor/models" - v1beta2 "github.com/goto/meteor/models/gotocompany/assets/v1beta2" - "github.com/goto/meteor/plugins" - "github.com/goto/meteor/plugins/extractors/bigquery/auditlog" - "github.com/goto/meteor/registry" - "github.com/goto/meteor/utils" "github.com/goto/salt/log" "github.com/pkg/errors" "google.golang.org/api/iterator" @@ -28,6 +21,13 @@ import ( "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/structpb" "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/goto/meteor/models" + v1beta2 "github.com/goto/meteor/models/gotocompany/assets/v1beta2" + "github.com/goto/meteor/plugins" + "github.com/goto/meteor/plugins/extractors/bigquery/auditlog" + "github.com/goto/meteor/registry" + "github.com/goto/meteor/utils" ) //go:embed README.md @@ -109,7 +109,7 @@ type Extractor struct { randFn randFn } -type randFn func(int64) (int64, error) +type randFn func(max, rndSeed int64) int64 type NewClientFunc func(ctx context.Context, logger log.Logger, config *Config) (*bigquery.Client, error) @@ -301,7 +301,7 @@ func (e *Extractor) buildAsset(ctx context.Context, t *bigquery.Table, md *bigqu var previewRows *structpb.ListValue if md.Type == bigquery.RegularTable { var err error - previewFields, previewRows, err = e.buildPreview(ctx, t) + previewFields, previewRows, err = e.buildPreview(ctx, t, md) if err != nil { e.logger.Warn("error building preview", "err", err, "table", tableFQN) } @@ -390,7 +390,7 @@ func (e *Extractor) buildColumn(ctx context.Context, field *bigquery.FieldSchema return col } -func (e *Extractor) buildPreview(ctx context.Context, t *bigquery.Table) (fields []string, rows *structpb.ListValue, err error) { +func (e *Extractor) buildPreview(ctx context.Context, t *bigquery.Table, md *bigquery.TableMetadata) (fields []string, rows *structpb.ListValue, err error) { maxPreviewRows := e.config.MaxPreviewRows if maxPreviewRows == 0 { return nil, nil, nil @@ -445,7 +445,7 @@ func (e *Extractor) buildPreview(ctx context.Context, t *bigquery.Table) (fields // if mix values true, then randomize the values if e.config.MixValues { - tempRows, err = e.mixValues(tempRows) + tempRows, err = e.mixValues(tempRows, md.LastModifiedTime.Unix()) if err != nil { return nil, nil, fmt.Errorf("mix values: %w", err) } @@ -459,7 +459,7 @@ func (e *Extractor) buildPreview(ctx context.Context, t *bigquery.Table) (fields return fields, rows, nil } -func (e *Extractor) mixValues(rows []interface{}) ([]interface{}, error) { +func (e *Extractor) mixValues(rows []interface{}, rndSeed int64) ([]interface{}, error) { numRows := len(rows) if numRows < 2 { return rows, nil @@ -473,10 +473,7 @@ func (e *Extractor) mixValues(rows []interface{}) ([]interface{}, error) { for col := 0; col < numColumns; col++ { for row := numRows - 1; row > 0; row-- { // Generate a random index within the range [0, row+1) - swapRow, err := e.randFn(int64(row + 1)) - if err != nil { - return nil, err - } + swapRow := e.randFn(int64(row+1), rndSeed) // Swap the values in the current row and the randomly selected row a, ok := mixedRows[row].([]interface{}) @@ -634,10 +631,7 @@ func init() { } } -func cryptoRandom(max int64) (int64, error) { - i, err := rand.Int(rand.Reader, big.NewInt(max)) - if err != nil { - return 0, err - } - return i.Int64(), nil +func cryptoRandom(max, rndSeed int64) int64 { + rnd := rand.New(rand.NewSource(rndSeed)) + return rnd.Int63n(max) } diff --git a/plugins/extractors/bigquery/bigquery_test.go b/plugins/extractors/bigquery/bigquery_test.go index 75bef246..337f9018 100644 --- a/plugins/extractors/bigquery/bigquery_test.go +++ b/plugins/extractors/bigquery/bigquery_test.go @@ -5,20 +5,13 @@ package bigquery_test import ( "context" - "errors" "fmt" "log" - "math/rand" "os" "testing" "time" bq "cloud.google.com/go/bigquery" - v1beta2 "github.com/goto/meteor/models/gotocompany/assets/v1beta2" - "github.com/goto/meteor/plugins" - "github.com/goto/meteor/plugins/extractors/bigquery" - "github.com/goto/meteor/test/mocks" - "github.com/goto/meteor/test/utils" slog "github.com/goto/salt/log" "github.com/nsf/jsondiff" "github.com/ory/dockertest/v3" @@ -27,6 +20,12 @@ import ( "google.golang.org/api/option" "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/timestamppb" + + v1beta2 "github.com/goto/meteor/models/gotocompany/assets/v1beta2" + "github.com/goto/meteor/plugins" + "github.com/goto/meteor/plugins/extractors/bigquery" + "github.com/goto/meteor/test/mocks" + "github.com/goto/meteor/test/utils" ) const ( @@ -156,7 +155,7 @@ func TestInit(t *testing.T) { } func TestExtract(t *testing.T) { - runTest := func(t *testing.T, cfg plugins.Config, randomizer func(int64) (int64, error)) []*v1beta2.Asset { + runTest := func(t *testing.T, cfg plugins.Config, randomizer func(max, seed int64) int64) []*v1beta2.Asset { extr := bigquery.New(utils.Logger, mockClient, randomizer) ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) defer cancel() @@ -204,46 +203,26 @@ func TestExtract(t *testing.T) { }, } - randFn := func(seed int64) func(max int64) (int64, error) { - randomizer := rand.New(rand.NewSource(seed)) - - return func(max int64) (int64, error) { - return randomizer.Int63n(max), nil - } + randFn := func(max, seed int64) int64 { + return 10 } t.Run("should return preview rows with mixed values", func(t *testing.T) { - seed := int64(42) - randomizer := randFn(seed) - - actual := runTest(t, cfg, randomizer) + actual := runTest(t, cfg, randFn) utils.AssertJSONFile(t, "testdata/expected-assets-mixed.json", actual, jsondiff.FullMatch) }) t.Run("with different seed should not equal to expected", func(t *testing.T) { - seed := int64(99) - randomizer := randFn(seed) - - actual := runTest(t, cfg, randomizer) + actual := runTest(t, cfg, randFn) utils.AssertJSONFile(t, "testdata/expected-assets-mixed.json", actual, jsondiff.NoMatch) }) - t.Run("should not build preview rows when randomFn fails", func(t *testing.T) { - actual := runTest(t, cfg, func(max int64) (int64, error) { - return 0, errors.New("randomizer failed") - }) - utils.AssertJSONFile(t, "testdata/expected-assets-no-preview.json", actual, jsondiff.FullMatch) - }) - t.Run("should not randomize if rows < 2", func(t *testing.T) { - seed := int64(42) - randomizer := randFn(seed) - newCfg := cfg newCfg.RawConfig["max_preview_rows"] = "1" - actual := runTest(t, newCfg, randomizer) + actual := runTest(t, newCfg, randFn) utils.AssertJSONFile(t, "testdata/expected-assets.json", actual, jsondiff.FullMatch) }) })