From 79149d32b95dca096b5ff85584c5f45dc677ad27 Mon Sep 17 00:00:00 2001 From: achettyiitr Date: Sun, 25 Aug 2024 03:15:39 +0530 Subject: [PATCH] chore: refactor warehouse integration tests --- .github/workflows/tests.yaml | 2 +- .../backendconfigtest/destination_builder.go | 7 + .../azure-synapse/azure_synapse_test.go | 336 ++-- .../azure-synapse/testdata/template.json | 132 -- .../clickhouse/clickhouse_test.go | 1451 ++++++++--------- .../clickhouse/testdata/template.json | 261 --- .../integrations/datalake/datalake_test.go | 583 ++++--- ...r-compose.yml => docker-compose.azure.yml} | 6 - .../datalake/testdata/docker-compose.gcs.yml | 8 + .../docker-compose.hive-metastore.yml | 18 + .../testdata/docker-compose.trino.yml | 17 - .../datalake/testdata/spark.staging.json | 8 + .../datalake/testdata/template.json | 327 ---- .../datalake/testdata/trino.staging.json | 8 + .../integrations/deltalake/deltalake_test.go | 399 +++-- .../deltalake/testdata/template.json | 142 -- warehouse/integrations/mssql/mssql_test.go | 318 ++-- .../integrations/mssql/testdata/template.json | 275 ---- .../integrations/postgres/postgres_test.go | 571 ++++--- .../postgres/testdata/template.json | 408 ----- .../integrations/redshift/redshift_test.go | 567 +++---- .../redshift/testdata/template.json | 385 ----- .../integrations/snowflake/snowflake_test.go | 820 ++++------ .../snowflake/testdata/template.json | 782 --------- warehouse/integrations/testhelper/service.go | 84 + warehouse/integrations/testhelper/setup.go | 34 - 26 files changed, 2572 insertions(+), 5377 deletions(-) delete mode 100644 warehouse/integrations/azure-synapse/testdata/template.json delete mode 100644 warehouse/integrations/clickhouse/testdata/template.json rename warehouse/integrations/datalake/testdata/{docker-compose.yml => docker-compose.azure.yml} (67%) create mode 100644 warehouse/integrations/datalake/testdata/docker-compose.gcs.yml create mode 100644 warehouse/integrations/datalake/testdata/docker-compose.hive-metastore.yml create mode 100644 warehouse/integrations/datalake/testdata/spark.staging.json delete mode 100644 warehouse/integrations/datalake/testdata/template.json create mode 100644 warehouse/integrations/datalake/testdata/trino.staging.json delete mode 100644 warehouse/integrations/deltalake/testdata/template.json delete mode 100644 warehouse/integrations/mssql/testdata/template.json delete mode 100644 warehouse/integrations/postgres/testdata/template.json delete mode 100644 warehouse/integrations/redshift/testdata/template.json delete mode 100644 warehouse/integrations/snowflake/testdata/template.json create mode 100644 warehouse/integrations/testhelper/service.go diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 02d424b70ce..97365ab1f83 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -83,7 +83,7 @@ jobs: echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_REDSHIFT_GINKGO_ACCESS_KEY_ID }}" >> $GITHUB_ENV echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_REDSHIFT_GINKGO_SECRET_ACCESS_KEY }}" >> $GITHUB_ENV - name: Warehouse Service Integration [ ${{ matrix.destination }} ] - run: make test-warehouse package=${{ matrix.package }} + run: FORCE_RUN_INTEGRATION_TESTS=true make test-warehouse package=${{ matrix.package }} env: BIGQUERY_INTEGRATION_TEST_CREDENTIALS: ${{ secrets.BIGQUERY_INTEGRATION_TEST_CREDENTIALS }} DATABRICKS_INTEGRATION_TEST_CREDENTIALS: ${{ secrets.DATABRICKS_INTEGRATION_TEST_CREDENTIALS }} diff --git a/testhelper/backendconfigtest/destination_builder.go b/testhelper/backendconfigtest/destination_builder.go index e93d06796e9..2f896cc7c80 100644 --- a/testhelper/backendconfigtest/destination_builder.go +++ b/testhelper/backendconfigtest/destination_builder.go @@ -2,6 +2,7 @@ package backendconfigtest import ( "github.com/rudderlabs/rudder-go-kit/testhelper/rand" + backendconfig "github.com/rudderlabs/rudder-server/backend-config" ) @@ -34,6 +35,12 @@ func (b *DestinationBuilder) WithID(id string) *DestinationBuilder { return b } +// WithRevisionID sets the revision ID of the destination +func (b *DestinationBuilder) WithRevisionID(revisionID string) *DestinationBuilder { + b.v.RevisionID = revisionID + return b +} + // WithConfigOption sets a config option for the destination func (b *DestinationBuilder) WithConfigOption(key string, value any) *DestinationBuilder { b.v.Config[key] = value diff --git a/warehouse/integrations/azure-synapse/azure_synapse_test.go b/warehouse/integrations/azure-synapse/azure_synapse_test.go index f4968622f15..135b26b2518 100644 --- a/warehouse/integrations/azure-synapse/azure_synapse_test.go +++ b/warehouse/integrations/azure-synapse/azure_synapse_test.go @@ -17,28 +17,27 @@ import ( "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" + + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" azuresynapse "github.com/rudderlabs/rudder-server/warehouse/integrations/azure-synapse" "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" mockuploader "github.com/rudderlabs/rudder-server/warehouse/internal/mocks/utils" "github.com/rudderlabs/rudder-server/warehouse/internal/model" + "github.com/rudderlabs/rudder-server/warehouse/validations" "github.com/rudderlabs/compose-test/compose" - "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" - "github.com/DATA-DOG/go-sqlmock" "github.com/stretchr/testify/require" "github.com/rudderlabs/compose-test/testcompose" kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" + backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/runner" - "github.com/rudderlabs/rudder-server/testhelper/health" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/client" - "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" - warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" - "github.com/rudderlabs/rudder-server/warehouse/validations" + whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) func TestIntegration(t *testing.T) { @@ -46,129 +45,99 @@ func TestIntegration(t *testing.T) { t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.") } - c := testcompose.New(t, compose.FilePaths([]string{ - "testdata/docker-compose.yml", - "../testdata/docker-compose.jobsdb.yml", - "../testdata/docker-compose.minio.yml", - })) - c.Start(context.Background()) - misc.Init() validations.Init() - warehouseutils.Init() - - jobsDBPort := c.Port("jobsDb", 5432) - minioPort := c.Port("minio", 9000) - azureSynapsePort := c.Port("azure_synapse", 1433) - - httpPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - workspaceID := warehouseutils.RandHex() - sourceID := warehouseutils.RandHex() - destinationID := warehouseutils.RandHex() - writeKey := warehouseutils.RandHex() - - destType := warehouseutils.AzureSynapse + whutils.Init() - namespace := testhelper.RandSchema(destType) + destType := whutils.AzureSynapse host := "localhost" database := "master" user := "SA" password := "reallyStrongPwd123" - bucketName := "testbucket" accessKeyID := "MYACCESSKEY" secretAccessKey := "MYSECRETKEY" region := "us-east-1" - minioEndpoint := fmt.Sprintf("localhost:%d", minioPort) - - templateConfigurations := map[string]any{ - "workspaceID": workspaceID, - "sourceID": sourceID, - "destinationID": destinationID, - "writeKey": writeKey, - "host": host, - "database": database, - "user": user, - "password": password, - "port": strconv.Itoa(azureSynapsePort), - "namespace": namespace, - "bucketName": bucketName, - "accessKeyID": accessKeyID, - "secretAccessKey": secretAccessKey, - "endPoint": minioEndpoint, - } - workspaceConfigPath := workspaceConfig.CreateTempFile(t, "testdata/template.json", templateConfigurations) - - testhelper.EnhanceWithDefaultEnvs(t) - t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("MINIO_ACCESS_KEY_ID", accessKeyID) - t.Setenv("MINIO_SECRET_ACCESS_KEY", secretAccessKey) - t.Setenv("MINIO_MINIO_ENDPOINT", minioEndpoint) - t.Setenv("MINIO_SSL", "false") - t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_JSONPATH", workspaceConfigPath) - - svcDone := make(chan struct{}) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + t.Run("Events flow", func(t *testing.T) { + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) - go func() { - r := runner.New(runner.ReleaseInfo{}) - _ = r.Run(ctx, []string{"azure-synapse-integration-test"}) + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) - close(svcDone) - }() - t.Cleanup(func() { <-svcDone }) + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) + azureSynapsePort := c.Port("azure_synapse", 1433) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) - serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) - health.WaitUntilReady(ctx, t, serviceHealthEndpoint, time.Minute, time.Second, "serviceHealthEndpoint") - - t.Run("Events flow", func(t *testing.T) { - t.Setenv("RSERVER_WAREHOUSE_AZURE_SYNAPSE_MAX_PARALLEL_LOADS", "8") - t.Setenv("RSERVER_WAREHOUSE_AZURE_SYNAPSE_SLOW_QUERY_THRESHOLD", "0s") - - jobsDB := testhelper.JobsDB(t, jobsDBPort) - - dsn := fmt.Sprintf("sqlserver://%s:%s@%s:%d?TrustServerCertificate=true&database=%s&encrypt=disable", - user, - password, - host, - azureSynapsePort, - database, - ) - db, err := sql.Open("sqlserver", dsn) - require.NoError(t, err) - require.NoError(t, db.Ping()) + jobsDB := whth.JobsDB(t, jobsDBPort) testcase := []struct { - name string - writeKey string - schema string - sourceID string - destinationID string - tables []string + name string + tables []string }{ { - name: "Upload Job", - writeKey: writeKey, - schema: namespace, - tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, - sourceID: sourceID, - destinationID: destinationID, + name: "Upload Job", + tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, }, } for _, tc := range testcase { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + namespace = whth.RandSchema(destType) + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("host", host). + WithConfigOption("database", database). + WithConfigOption("user", user). + WithConfigOption("password", password). + WithConfigOption("port", strconv.Itoa(azureSynapsePort)). + WithConfigOption("sslMode", "disable"). + WithConfigOption("namespace", namespace). + WithConfigOption("bucketProvider", whutils.MINIO). + WithConfigOption("bucketName", bucketName). + WithConfigOption("accessKeyID", accessKeyID). + WithConfigOption("secretAccessKey", secretAccessKey). + WithConfigOption("useSSL", false). + WithConfigOption("endPoint", minioEndpoint). + WithConfigOption("useRudderStorage", false). + WithConfigOption("syncFrequency", "30") + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("RSERVER_WAREHOUSE_AZURE_SYNAPSE_MAX_PARALLEL_LOADS", "8") + t.Setenv("RSERVER_WAREHOUSE_AZURE_SYNAPSE_SLOW_QUERY_THRESHOLD", "0s") + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + dsn := fmt.Sprintf("sqlserver://%s:%s@%s:%d?TrustServerCertificate=true&database=%s&encrypt=disable", + user, password, host, azureSynapsePort, database, + ) + db, err := sql.Open("sqlserver", dsn) + require.NoError(t, err) + require.NoError(t, db.Ping()) + t.Cleanup(func() { + _ = db.Close() + }) sqlClient := &client.Client{ SQL: db, @@ -176,7 +145,7 @@ func TestIntegration(t *testing.T) { } conf := map[string]any{ - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -186,12 +155,12 @@ func TestIntegration(t *testing.T) { } t.Log("verifying test case 1") - ts1 := testhelper.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + ts1 := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, Config: conf, WorkspaceID: workspaceID, DestinationType: destType, @@ -201,17 +170,17 @@ func TestIntegration(t *testing.T) { JobRunID: misc.FastUUID().String(), TaskRunID: misc.FastUUID().String(), StagingFilePath: "testdata/upload-job.staging-1.json", - UserID: testhelper.GetUserId(destType), + UserID: whth.GetUserId(destType), } ts1.VerifyEvents(t) t.Log("verifying test case 2") - ts2 := testhelper.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + ts2 := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, Config: conf, WorkspaceID: workspaceID, DestinationType: destType, @@ -221,7 +190,7 @@ func TestIntegration(t *testing.T) { JobRunID: misc.FastUUID().String(), TaskRunID: misc.FastUUID().String(), StagingFilePath: "testdata/upload-job.staging-2.json", - UserID: testhelper.GetUserId(destType), + UserID: whth.GetUserId(destType), } ts2.VerifyEvents(t) }) @@ -229,8 +198,16 @@ func TestIntegration(t *testing.T) { }) t.Run("Validations", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + azureSynapsePort := c.Port("azure_synapse", 1433) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + + namespace := whth.RandSchema(destType) + dest := backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]any{ "host": host, "database": database, @@ -238,8 +215,8 @@ func TestIntegration(t *testing.T) { "password": password, "port": strconv.Itoa(azureSynapsePort), "sslMode": "disable", - "namespace": "", - "bucketProvider": "MINIO", + "namespace": namespace, + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -255,19 +232,21 @@ func TestIntegration(t *testing.T) { }, Name: "azure-synapse-demo", Enabled: true, - RevisionID: destinationID, + RevisionID: "test_destination_id", } - testhelper.VerifyConfigurationTest(t, dest) + + whth.VerifyConfigurationTest(t, dest) }) t.Run("Load Table", func(t *testing.T) { - const ( - sourceID = "test_source_id" - destinationID = "test_destination_id" - workspaceID = "test_workspace_id" - ) + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + azureSynapsePort := c.Port("azure_synapse", 1433) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) - namespace := testhelper.RandSchema(destType) + ctx := context.Background() + namespace := whth.RandSchema(destType) schemaInUpload := model.TableSchema{ "test_bool": "boolean", @@ -295,10 +274,10 @@ func TestIntegration(t *testing.T) { warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", DestinationDefinition: backendconfig.DestinationDefinitionT{ Name: destType, }, @@ -310,7 +289,7 @@ func TestIntegration(t *testing.T) { "port": strconv.Itoa(azureSynapsePort), "sslMode": "disable", "namespace": "", - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -320,12 +299,12 @@ func TestIntegration(t *testing.T) { "useRudderStorage": false, }, }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } fm, err := filemanager.New(&filemanager.Settings{ - Provider: warehouseutils.MINIO, + Provider: whutils.MINIO, Config: map[string]any{ "bucketName": bucketName, "accessKeyID": accessKeyID, @@ -336,7 +315,7 @@ func TestIntegration(t *testing.T) { "disableSSL": true, "region": region, "enableSSE": false, - "bucketProvider": warehouseutils.MINIO, + "bucketProvider": whutils.MINIO, }, }) require.NoError(t, err) @@ -344,9 +323,9 @@ func TestIntegration(t *testing.T) { t.Run("schema does not exists", func(t *testing.T) { tableName := "schema_not_exists_test_table" - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) az := azuresynapse.New(config.New(), logger.NOP, stats.NOP) @@ -360,9 +339,9 @@ func TestIntegration(t *testing.T) { t.Run("table does not exists", func(t *testing.T) { tableName := "table_not_exists_test_table" - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) az := azuresynapse.New(config.New(), logger.NOP, stats.NOP) @@ -380,9 +359,9 @@ func TestIntegration(t *testing.T) { tableName := "merge_test_table" t.Run("without dedup", func(t *testing.T) { - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) az := azuresynapse.New(config.New(), logger.NOP, stats.NOP) @@ -405,7 +384,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(0)) require.Equal(t, loadTableStat.RowsUpdated, int64(14)) - records := testhelper.RetrieveRecordsFromWarehouse(t, az.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, az.DB.DB, fmt.Sprintf(` SELECT id, @@ -424,12 +403,12 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.SampleTestRecords()) + require.Equal(t, records, whth.SampleTestRecords()) }) t.Run("with dedup", func(t *testing.T) { - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) az := azuresynapse.New(config.New(), logger.NOP, stats.NOP) @@ -447,7 +426,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(0)) require.Equal(t, loadTableStat.RowsUpdated, int64(14)) - records := testhelper.RetrieveRecordsFromWarehouse(t, az.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, az.DB.DB, fmt.Sprintf(` SELECT id, @@ -466,13 +445,13 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.DedupTestRecords()) + require.Equal(t, records, whth.DedupTestRecords()) }) }) t.Run("load file does not exists", func(t *testing.T) { tableName := "load_file_not_exists_test_table" - loadFiles := []warehouseutils.LoadFile{{ + loadFiles := []whutils.LoadFile{{ Location: "http://localhost:1234/testbucket/rudder-warehouse-load-objects/load_file_not_exists_test_table/test_source_id/f31af97e-03e8-46d0-8a1a-1786cb85b22c-load_file_not_exists_test_table/load.csv.gz", }} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) @@ -494,9 +473,9 @@ func TestIntegration(t *testing.T) { t.Run("mismatch in number of columns", func(t *testing.T) { tableName := "mismatch_columns_test_table" - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) az := azuresynapse.New(config.New(), logger.NOP, stats.NOP) @@ -516,9 +495,9 @@ func TestIntegration(t *testing.T) { t.Run("mismatch in schema", func(t *testing.T) { tableName := "mismatch_schema_test_table" - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) az := azuresynapse.New(config.New(), logger.NOP, stats.NOP) @@ -536,7 +515,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(14)) require.Equal(t, loadTableStat.RowsUpdated, int64(0)) - records := testhelper.RetrieveRecordsFromWarehouse(t, az.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, az.DB.DB, fmt.Sprintf(` SELECT id, @@ -555,15 +534,15 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.MismatchSchemaTestRecords()) + require.Equal(t, records, whth.MismatchSchemaTestRecords()) }) t.Run("discards", func(t *testing.T) { - tableName := warehouseutils.DiscardsTable + tableName := whutils.DiscardsTable - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, warehouseutils.DiscardsSchema, warehouseutils.DiscardsSchema) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, whutils.DiscardsSchema, whutils.DiscardsSchema) az := azuresynapse.New(config.New(), logger.NOP, stats.NOP) err := az.Setup(ctx, warehouse, mockUploader) @@ -572,7 +551,7 @@ func TestIntegration(t *testing.T) { err = az.CreateSchema(ctx) require.NoError(t, err) - err = az.CreateTable(ctx, tableName, warehouseutils.DiscardsSchema) + err = az.CreateTable(ctx, tableName, whutils.DiscardsSchema) require.NoError(t, err) loadTableStat, err := az.LoadTable(ctx, tableName) @@ -580,7 +559,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(6)) require.Equal(t, loadTableStat.RowsUpdated, int64(0)) - records := testhelper.RetrieveRecordsFromWarehouse(t, az.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, az.DB.DB, fmt.Sprintf(` SELECT column_name, @@ -597,17 +576,26 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.DiscardTestRecords()) + require.Equal(t, records, whth.DiscardTestRecords()) }) }) t.Run("CrashRecovery", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + azureSynapsePort := c.Port("azure_synapse", 1433) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + + ctx := context.Background() + namespace := whth.RandSchema(destType) + warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", DestinationDefinition: backendconfig.DestinationDefinitionT{ Name: destType, }, @@ -618,8 +606,8 @@ func TestIntegration(t *testing.T) { "password": password, "port": strconv.Itoa(azureSynapsePort), "sslMode": "disable", - "namespace": "", - "bucketProvider": "MINIO", + "namespace": namespace, + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -629,20 +617,23 @@ func TestIntegration(t *testing.T) { "useRudderStorage": false, }, }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } tableName := "crash_recovery_test_table" - mockUploader := newMockUploader(t, []warehouseutils.LoadFile{}, tableName, warehouseutils.DiscardsSchema, warehouseutils.DiscardsSchema) + mockUploader := newMockUploader(t, []whutils.LoadFile{}, tableName, whutils.DiscardsSchema, whutils.DiscardsSchema) t.Run("successful cleanup", func(t *testing.T) { az := azuresynapse.New(config.New(), logger.NOP, stats.NOP) err := az.Setup(ctx, warehouse, mockUploader) require.NoError(t, err) - stagingTable := warehouseutils.StagingTablePrefix(warehouseutils.AzureSynapse) + tableName + stagingTable := whutils.StagingTablePrefix(destType) + tableName + + _, err = az.DB.ExecContext(ctx, fmt.Sprintf("IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = '%[1]s') BEGIN EXEC('CREATE SCHEMA %[1]s') END;", namespace)) + require.NoError(t, err) _, err = az.DB.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %q.%q (id int)", namespace, stagingTable)) require.NoError(t, err) @@ -670,7 +661,6 @@ func TestIntegration(t *testing.T) { sql.Named("table", stagingTable), ).Scan(&count) require.NoError(t, err) - require.Equal(t, 0, count, "staging table should be dropped") }) @@ -681,7 +671,9 @@ func TestIntegration(t *testing.T) { db, dbMock, err := sqlmock.New() require.NoError(t, err) - defer db.Close() + defer func() { + _ = db.Close() + }() dbMock.ExpectQuery("select table_name").WillReturnError(fmt.Errorf("query error")) @@ -786,11 +778,11 @@ func TestAzureSynapse_ProcessColumnValue(t *testing.T) { func newMockUploader( t testing.TB, - loadFiles []warehouseutils.LoadFile, + loadFiles []whutils.LoadFile, tableName string, schemaInUpload model.TableSchema, schemaInWarehouse model.TableSchema, -) warehouseutils.Uploader { +) whutils.Uploader { ctrl := gomock.NewController(t) t.Cleanup(ctrl.Finish) diff --git a/warehouse/integrations/azure-synapse/testdata/template.json b/warehouse/integrations/azure-synapse/testdata/template.json deleted file mode 100644 index 3db509d7e38..00000000000 --- a/warehouse/integrations/azure-synapse/testdata/template.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "enableMetrics": false, - "workspaceId": "{{.workspaceID}}", - "sources": [ - { - "config": { - "eventUpload": false, - "eventUploadTS": 1637229453729 - }, - "id": "{{.sourceID}}", - "name": "azure-synapse-integration", - "writeKey": "{{.writeKey}}", - "enabled": true, - "sourceDefinitionId": "1TW3fuvuaZqJs877OEailT17KzZ", - "createdBy": "1wLg8l6vAj2TuUUMIIBKL4nsVOT", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-08-08T14:49:21.580Z", - "updatedAt": "2021-11-18T09:57:33.742Z", - "destinations": [ - { - "config": { - "host": "{{.host}}", - "database": "{{.database}}", - "user": "{{.user}}", - "password": "{{.password}}", - "port": "{{.port}}", - "sslMode": "disable", - "namespace": "{{.namespace}}", - "bucketProvider": "MINIO", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "secretAccessKey": "{{.secretAccessKey}}", - "useSSL": false, - "endPoint": "{{.endPoint}}", - "syncFrequency": "30", - "useRudderStorage": false - }, - "secretConfig": {}, - "id": "{{.destinationID}}", - "name": "azure-synapse-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-11-21T18:58:44.286Z", - "updatedAt": "2021-11-21T18:58:44.286Z", - "revisionId": "{{.destinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "database", - "user", - "password", - "port", - "sslMode", - "namespace", - "bucketProvider", - "bucketName", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "secretAccessKey", - "useSSL", - "containerName", - "endPoint", - "syncFrequency", - "syncStartAt", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey", - "accountKey", - "secretAccessKey", - "credentials" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": {}, - "id": "1qvbUYC2xVQ7lvI9UUYkkM4IBt9", - "name": "AZURE_SYNAPSE", - "displayName": "Microsoft SQL Server", - "category": "warehouse", - "createdAt": "2021-04-09T10:10:26.589Z", - "updatedAt": "2021-11-11T07:55:15.622Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1TW3fuvuaZqJs877OEailT17KzZ", - "name": "Javascript", - "displayName": "Javascript", - "category": null, - "createdAt": "2019-11-12T12:35:30.464Z", - "updatedAt": "2021-09-28T02:27:30.373Z" - }, - "dgSourceTrackingPlanConfig": null - } - ], - "libraries": [ - { - "versionId": "23Uxw7QEiOg8e0KkQV8LmNfWaWh" - } - ] -} diff --git a/warehouse/integrations/clickhouse/clickhouse_test.go b/warehouse/integrations/clickhouse/clickhouse_test.go index e2178d1971a..5645107054c 100644 --- a/warehouse/integrations/clickhouse/clickhouse_test.go +++ b/warehouse/integrations/clickhouse/clickhouse_test.go @@ -12,6 +12,8 @@ import ( "testing" "time" + "github.com/samber/lo" + "github.com/rudderlabs/rudder-go-kit/stats" "go.uber.org/mock/gomock" @@ -28,16 +30,14 @@ import ( kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/runner" - "github.com/rudderlabs/rudder-server/testhelper/health" - "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/client" "github.com/rudderlabs/rudder-server/warehouse/integrations/clickhouse" - "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" + whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" mockuploader "github.com/rudderlabs/rudder-server/warehouse/internal/mocks/utils" "github.com/rudderlabs/rudder-server/warehouse/internal/model" - warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" "github.com/rudderlabs/rudder-server/warehouse/validations" ) @@ -46,143 +46,72 @@ func TestIntegration(t *testing.T) { t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.") } - c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml", "testdata/docker-compose.clickhouse-cluster.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) - c.Start(context.Background()) - misc.Init() validations.Init() - warehouseutils.Init() - - jobsDBPort := c.Port("jobsDb", 5432) - minioPort := c.Port("minio", 9000) - port := c.Port("clickhouse", 9000) - clusterPort1 := c.Port("clickhouse01", 9000) - clusterPort2 := c.Port("clickhouse02", 9000) - clusterPort3 := c.Port("clickhouse03", 9000) - clusterPort4 := c.Port("clickhouse04", 9000) - - httpPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - workspaceID := warehouseutils.RandHex() - sourceID := warehouseutils.RandHex() - destinationID := warehouseutils.RandHex() - writeKey := warehouseutils.RandHex() - clusterSourceID := warehouseutils.RandHex() - clusterDestinationID := warehouseutils.RandHex() - clusterWriteKey := warehouseutils.RandHex() + whutils.Init() - destType := warehouseutils.CLICKHOUSE + destType := whutils.CLICKHOUSE host := "localhost" database := "rudderdb" user := "rudder" password := "rudder-password" cluster := "rudder_cluster" - bucketName := "testbucket" accessKeyID := "MYACCESSKEY" secretAccessKey := "MYSECRETKEY" - minioEndpoint := fmt.Sprintf("localhost:%d", minioPort) - - templateConfigurations := map[string]any{ - "workspaceID": workspaceID, - "sourceID": sourceID, - "destinationID": destinationID, - "clusterSourceID": clusterSourceID, - "clusterDestinationID": clusterDestinationID, - "writeKey": writeKey, - "clusterWriteKey": clusterWriteKey, - "host": host, - "database": database, - "user": user, - "password": password, - "port": strconv.Itoa(port), - "cluster": cluster, - "clusterHost": host, - "clusterDatabase": database, - "clusterCluster": cluster, - "clusterUser": user, - "clusterPassword": password, - "clusterPort": strconv.Itoa(clusterPort1), - "bucketName": bucketName, - "accessKeyID": accessKeyID, - "secretAccessKey": secretAccessKey, - "endPoint": minioEndpoint, - } - workspaceConfigPath := workspaceConfig.CreateTempFile(t, "testdata/template.json", templateConfigurations) - - testhelper.EnhanceWithDefaultEnvs(t) - t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("MINIO_ACCESS_KEY_ID", accessKeyID) - t.Setenv("MINIO_SECRET_ACCESS_KEY", secretAccessKey) - t.Setenv("MINIO_MINIO_ENDPOINT", minioEndpoint) - t.Setenv("MINIO_SSL", "false") - t.Setenv("RSERVER_WAREHOUSE_CLICKHOUSE_MAX_PARALLEL_LOADS", "8") - t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_JSONPATH", workspaceConfigPath) - t.Setenv("RSERVER_WAREHOUSE_CLICKHOUSE_SLOW_QUERY_THRESHOLD", "0s") - - svcDone := make(chan struct{}) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - go func() { - r := runner.New(runner.ReleaseInfo{}) - _ = r.Run(ctx, []string{"clickhouse-integration-test"}) - - close(svcDone) - }() - t.Cleanup(func() { <-svcDone }) - - serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) - health.WaitUntilReady(ctx, t, serviceHealthEndpoint, time.Minute, time.Second, "serviceHealthEndpoint") - t.Run("Events flow", func(t *testing.T) { - var dbs []*sql.DB - - for _, port := range []int{port, clusterPort1, clusterPort2, clusterPort3, clusterPort4} { - dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", - "localhost", port, "rudderdb", "rudder-password", "rudder", - ) + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) - db := connectClickhouseDB(ctx, t, dsn) - dbs = append(dbs, db) - } + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml", "testdata/docker-compose.clickhouse-cluster.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) - jobsDB := testhelper.JobsDB(t, jobsDBPort) + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) + clickhousePort := c.Port("clickhouse", 9000) + clickhouseClusterPort1 := c.Port("clickhouse01", 9000) + clickhouseClusterPort2 := c.Port("clickhouse02", 9000) + clickhouseClusterPort3 := c.Port("clickhouse03", 9000) + clickhouseClusterPort4 := c.Port("clickhouse04", 9000) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) - tables := []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"} + jobsDB := whth.JobsDB(t, jobsDBPort) testCases := []struct { name string - writeKey string - sourceID string - destinationID string - warehouseEvents testhelper.EventsCountMap - warehouseModifiedEvents testhelper.EventsCountMap - clusterSetup func(t *testing.T) - db *sql.DB + warehouseEvents whth.EventsCountMap + warehouseModifiedEvents whth.EventsCountMap + clusterSetup func(*testing.T, context.Context) + setupDB func(testing.TB, context.Context) *sql.DB stagingFilePrefix string + configOverride map[string]any }{ { - name: "Single Setup", - writeKey: writeKey, - sourceID: sourceID, - destinationID: destinationID, - db: dbs[0], + name: "Single Setup", + setupDB: func(t testing.TB, ctx context.Context) *sql.DB { + t.Helper() + dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", + host, clickhousePort, database, password, user, + ) + return connectClickhouseDB(t, ctx, dsn) + }, stagingFilePrefix: "testdata/upload-job", + configOverride: map[string]any{ + "port": strconv.Itoa(clickhousePort), + }, }, { - name: "Cluster Mode Setup", - writeKey: clusterWriteKey, - sourceID: clusterSourceID, - destinationID: clusterDestinationID, - db: dbs[1], - warehouseModifiedEvents: testhelper.EventsCountMap{ + name: "Cluster Mode Setup", + setupDB: func(t testing.TB, ctx context.Context) *sql.DB { + t.Helper() + dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", + host, clickhouseClusterPort1, database, password, user, + ) + return connectClickhouseDB(t, ctx, dsn) + }, + warehouseModifiedEvents: whth.EventsCountMap{ "identifies": 8, "users": 2, "tracks": 8, @@ -192,25 +121,83 @@ func TestIntegration(t *testing.T) { "aliases": 8, "groups": 8, }, - clusterSetup: func(t *testing.T) { + clusterSetup: func(t *testing.T, ctx context.Context) { t.Helper() - initializeClickhouseClusterMode(t, dbs[1:], tables, clusterPort1) + + clusterPorts := []int{clickhouseClusterPort2, clickhouseClusterPort3, clickhouseClusterPort4} + dbs := lo.Map(clusterPorts, func(port, _ int) *sql.DB { + dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", + host, port, database, password, user, + ) + return connectClickhouseDB(t, ctx, dsn) + }) + tables := []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"} + initializeClickhouseClusterMode(t, dbs, tables, clickhouseClusterPort1) }, stagingFilePrefix: "testdata/upload-cluster-job", + configOverride: map[string]any{ + "cluster": cluster, + "port": strconv.Itoa(clickhouseClusterPort1), + }, }, } for _, tc := range testCases { - tc := tc - t.Run(tc.name, func(t *testing.T) { + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("host", host). + WithConfigOption("database", database). + WithConfigOption("user", user). + WithConfigOption("password", password). + WithConfigOption("bucketProvider", whutils.MINIO). + WithConfigOption("bucketName", bucketName). + WithConfigOption("accessKeyID", accessKeyID). + WithConfigOption("secretAccessKey", secretAccessKey). + WithConfigOption("useSSL", false). + WithConfigOption("secure", false). + WithConfigOption("endPoint", minioEndpoint). + WithConfigOption("useRudderStorage", false). + WithConfigOption("syncFrequency", "30") + for k, v := range tc.configOverride { + destinationBuilder = destinationBuilder.WithConfigOption(k, v) + } + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("RSERVER_WAREHOUSE_CLICKHOUSE_MAX_PARALLEL_LOADS", "8") + t.Setenv("RSERVER_WAREHOUSE_CLICKHOUSE_SLOW_QUERY_THRESHOLD", "0s") + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + db := tc.setupDB(t, context.Background()) + t.Cleanup(func() { _ = db.Close() }) + tables := []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"} + sqlClient := &client.Client{ - SQL: tc.db, + SQL: db, Type: client.SQLClient, } conf := map[string]interface{}{ - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -220,12 +207,12 @@ func TestIntegration(t *testing.T) { } t.Log("verifying test case 1") - ts1 := testhelper.TestConfig{ - WriteKey: tc.writeKey, + ts1 := whth.TestConfig{ + WriteKey: writeKey, Schema: database, Tables: tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, WarehouseEventsMap: tc.warehouseEvents, Config: conf, WorkspaceID: workspaceID, @@ -233,23 +220,23 @@ func TestIntegration(t *testing.T) { JobsDB: jobsDB, HTTPPort: httpPort, Client: sqlClient, - UserID: testhelper.GetUserId(destType), + UserID: whth.GetUserId(destType), StagingFilePath: tc.stagingFilePrefix + ".staging-1.json", } ts1.VerifyEvents(t) t.Log("setting up cluster") if tc.clusterSetup != nil { - tc.clusterSetup(t) + tc.clusterSetup(t, context.Background()) } t.Log("verifying test case 2") - ts2 := testhelper.TestConfig{ - WriteKey: tc.writeKey, + ts2 := whth.TestConfig{ + WriteKey: writeKey, Schema: database, Tables: tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, WarehouseEventsMap: tc.warehouseModifiedEvents, Config: conf, WorkspaceID: workspaceID, @@ -257,7 +244,7 @@ func TestIntegration(t *testing.T) { JobsDB: jobsDB, HTTPPort: httpPort, Client: sqlClient, - UserID: testhelper.GetUserId(destType), + UserID: whth.GetUserId(destType), StagingFilePath: tc.stagingFilePrefix + ".staging-2.json", } ts2.VerifyEvents(t) @@ -266,6 +253,12 @@ func TestIntegration(t *testing.T) { }) t.Run("Validations", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + clickhousePort := c.Port("clickhouse", 9000) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + dest := backendconfig.DestinationT{ ID: "21Ev6TI6emCFDKph2Zn6XfTP7PI", Config: map[string]any{ @@ -274,10 +267,9 @@ func TestIntegration(t *testing.T) { "cluster": "", "user": user, "password": password, - "port": strconv.Itoa(port), + "port": strconv.Itoa(clickhousePort), "secure": false, - "namespace": "", - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -287,7 +279,7 @@ func TestIntegration(t *testing.T) { "useRudderStorage": false, }, DestinationDefinition: backendconfig.DestinationDefinitionT{ - ID: destinationID, + ID: "test_destination_id", Name: "CLICKHOUSE", DisplayName: "ClickHouse", }, @@ -295,232 +287,49 @@ func TestIntegration(t *testing.T) { Enabled: true, RevisionID: "29eeuTnqbBKn0XVTj5z9XQIbaru", } - testhelper.VerifyConfigurationTest(t, dest) + whth.VerifyConfigurationTest(t, dest) }) -} -func TestClickhouse_UseS3CopyEngineForLoading(t *testing.T) { - S3EngineEnabledWorkspaceIDs := []string{"BpLnfgDsc2WD8F2qNfHK5a84jjJ"} - - testCases := []struct { - name string - ObjectStorage string - workspaceID string - useS3Engine bool - }{ - { - name: "incompatible object storage(AZURE BLOB)", - ObjectStorage: warehouseutils.AzureBlob, - workspaceID: "test-workspace-id", - }, - { - name: "incompatible object storage(GCS)", - ObjectStorage: warehouseutils.GCS, - workspaceID: "test-workspace-id", - }, - { - name: "incompatible workspace", - ObjectStorage: warehouseutils.S3, - workspaceID: "test-workspace-id", - }, - { - name: "compatible workspace with incompatible object storage", - ObjectStorage: warehouseutils.GCS, - workspaceID: "BpLnfgDsc2WD8F2qNfHK5a84jjJ", - }, - { - name: "compatible workspace(S3)", - ObjectStorage: warehouseutils.S3, - workspaceID: "BpLnfgDsc2WD8F2qNfHK5a84jjJ", - useS3Engine: true, - }, - { - name: "compatible workspace(MINIO)", - ObjectStorage: warehouseutils.MINIO, - workspaceID: "BpLnfgDsc2WD8F2qNfHK5a84jjJ", - useS3Engine: true, - }, - } + t.Run("Fetch schema", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml"})) + c.Start(context.Background()) - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - c := config.New() - c.Set("Warehouse.clickhouse.s3EngineEnabledWorkspaceIDs", S3EngineEnabledWorkspaceIDs) + workspaceID := whutils.RandHex() + clickhousePort := c.Port("clickhouse", 9000) - ch := clickhouse.New(c, logger.NOP, stats.NOP) - ch.Warehouse = model.Warehouse{ - WorkspaceID: tc.workspaceID, - } - ch.ObjectStorage = tc.ObjectStorage + ctx := context.Background() + namespace := "test_namespace" + table := "test_table" - require.Equal(t, tc.useS3Engine, ch.UseS3CopyEngineForLoading()) - }) - } -} - -func TestClickhouse_LoadTableRoundTrip(t *testing.T) { - c := testcompose.New(t, compose.FilePaths([]string{ - "testdata/docker-compose.clickhouse.yml", - "../testdata/docker-compose.minio.yml", - })) - c.Start(context.Background()) - - misc.Init() - warehouseutils.Init() - - minioPort := c.Port("minio", 9000) - clickhousePort := c.Port("clickhouse", 9000) - - bucketName := "testbucket" - accessKeyID := "MYACCESSKEY" - secretAccessKey := "MYSECRETKEY" - region := "us-east-1" - databaseName := "rudderdb" - password := "rudder-password" - user := "rudder" - table := "test_table" - workspaceID := "test_workspace_id" - provider := "MINIO" - - minioEndpoint := fmt.Sprintf("localhost:%d", minioPort) - - dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", - "localhost", clickhousePort, databaseName, password, user, - ) - - db := connectClickhouseDB(context.Background(), t, dsn) - defer func() { _ = db.Close() }() - - testCases := []struct { - name string - fileName string - S3EngineEnabledWorkspaceIDs []string - disableNullable bool - }{ - { - name: "normal loading using downloading of load files", - fileName: "testdata/load.csv.gz", - }, - { - name: "using s3 engine for loading", - S3EngineEnabledWorkspaceIDs: []string{workspaceID}, - fileName: "testdata/load-copy.csv.gz", - }, - { - name: "normal loading using downloading of load files with disable nullable", - fileName: "testdata/load.csv.gz", - disableNullable: true, - }, - { - name: "using s3 engine for loading with disable nullable", - S3EngineEnabledWorkspaceIDs: []string{workspaceID}, - fileName: "testdata/load-copy.csv.gz", - disableNullable: true, - }, - } - - for i, tc := range testCases { - i := i - tc := tc - - t.Run(tc.name, func(t *testing.T) { - c := config.New() - c.Set("Warehouse.clickhouse.s3EngineEnabledWorkspaceIDs", tc.S3EngineEnabledWorkspaceIDs) - c.Set("Warehouse.clickhouse.disableNullable", tc.disableNullable) + dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", + host, clickhousePort, database, password, user, + ) + db := connectClickhouseDB(t, ctx, dsn) + defer func() { _ = db.Close() }() - ch := clickhouse.New(c, logger.NOP, stats.NOP) + t.Run("Success", func(t *testing.T) { + ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) warehouse := model.Warehouse{ - Namespace: fmt.Sprintf("test_namespace_%d", i), + Namespace: fmt.Sprintf("%s_success", namespace), WorkspaceID: workspaceID, Destination: backendconfig.DestinationT{ Config: map[string]any{ - "bucketProvider": provider, - "host": "localhost", - "port": strconv.Itoa(clickhousePort), - "database": databaseName, - "user": user, - "password": password, - "bucketName": bucketName, - "accessKeyID": accessKeyID, - "secretAccessKey": secretAccessKey, - "endPoint": minioEndpoint, + "host": host, + "port": strconv.Itoa(clickhousePort), + "database": database, + "user": user, + "password": password, }, }, } - t.Log("Preparing load files metadata") - f, err := os.Open(tc.fileName) - require.NoError(t, err) - - defer func() { _ = f.Close() }() - - fm, err := filemanager.New(&filemanager.Settings{ - Provider: provider, - Config: map[string]any{ - "bucketName": bucketName, - "accessKeyID": accessKeyID, - "secretAccessKey": secretAccessKey, - "endPoint": minioEndpoint, - "forcePathStyle": true, - "disableSSL": true, - "region": region, - "enableSSE": false, - }, - }) - require.NoError(t, err) - - ctx := context.Background() - uploadOutput, err := fm.Upload(ctx, f, fmt.Sprintf("test_prefix_%d", i)) - require.NoError(t, err) - - mockUploader := newMockUploader(t, - strconv.Itoa(minioPort), - model.TableSchema{ - "alter_test_bool": "boolean", - "alter_test_datetime": "datetime", - "alter_test_float": "float", - "alter_test_int": "int", - "alter_test_string": "string", - "id": "string", - "received_at": "datetime", - "test_array_bool": "array(boolean)", - "test_array_datetime": "array(datetime)", - "test_array_float": "array(float)", - "test_array_int": "array(int)", - "test_array_string": "array(string)", - "test_bool": "boolean", - "test_datetime": "datetime", - "test_float": "float", - "test_int": "int", - "test_string": "string", - }, - []warehouseutils.LoadFile{{Location: uploadOutput.Location}}, - ) - - t.Log("Setting up clickhouse") - err = ch.Setup(ctx, warehouse, mockUploader) - require.NoError(t, err) - - t.Log("Verifying connection") - _, err = ch.Connect(ctx, warehouse) - require.NoError(t, err) - - t.Log("Verifying empty schema") - schema, unrecognizedSchema, err := ch.FetchSchema(ctx) - require.NoError(t, err) - require.Empty(t, schema) - require.Empty(t, unrecognizedSchema) - - t.Log("Creating schema") - err = ch.CreateSchema(ctx) + err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) require.NoError(t, err) - t.Log("Creating schema twice should not fail") err = ch.CreateSchema(ctx) require.NoError(t, err) - t.Log("Creating table") err = ch.CreateTable(ctx, table, model.TableSchema{ "id": "string", "test_int": "int", @@ -537,163 +346,77 @@ func TestClickhouse_LoadTableRoundTrip(t *testing.T) { }) require.NoError(t, err) - t.Log("Adding columns") - err = ch.AddColumns(ctx, table, []warehouseutils.ColumnInfo{ - {Name: "alter_test_int", Type: "int"}, - {Name: "alter_test_float", Type: "float"}, - {Name: "alter_test_bool", Type: "boolean"}, - {Name: "alter_test_string", Type: "string"}, - {Name: "alter_test_datetime", Type: "datetime"}, - }) - require.NoError(t, err) - - t.Log("Verifying schema") - schema, unrecognizedSchema, err = ch.FetchSchema(ctx) + schema, unrecognizedSchema, err := ch.FetchSchema(ctx) require.NoError(t, err) require.NotEmpty(t, schema) require.Empty(t, unrecognizedSchema) + }) - t.Log("verifying if columns are not like Nullable(T) if disableNullable set to true") - if tc.disableNullable { - rows, err := ch.DB.Query(fmt.Sprintf(`select table, name, type from system.columns where database = '%s'`, warehouse.Namespace)) - require.NoError(t, err) - - defer func() { _ = rows.Close() }() - - var ( - tableName string - columnName string - columnType string - ) - - for rows.Next() { - err = rows.Scan(&tableName, &columnName, &columnType) - require.NoError(t, err) + t.Run("Invalid host", func(t *testing.T) { + ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - if strings.Contains(columnType, "Nullable") { - require.Fail(t, fmt.Sprintf("table %s column %s is of Nullable type even when disableNullable is set to true", tableName, columnName)) - } - } - require.NoError(t, rows.Err()) + warehouse := model.Warehouse{ + Namespace: fmt.Sprintf("%s_invalid_host", namespace), + WorkspaceID: workspaceID, + Destination: backendconfig.DestinationT{ + Config: map[string]any{ + "host": "clickhouse", + "port": strconv.Itoa(clickhousePort), + "database": database, + "user": user, + "password": password, + }, + }, } - t.Log("Loading data into table") - _, err = ch.LoadTable(ctx, table) + err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) require.NoError(t, err) - t.Log("Drop table") - err = ch.DropTable(ctx, table) - require.NoError(t, err) + schema, unrecognizedSchema, err := ch.FetchSchema(ctx) + require.ErrorContains(t, err, errors.New("dial tcp: lookup clickhouse").Error()) + require.Empty(t, schema) + require.Empty(t, unrecognizedSchema) + }) - t.Log("Creating users identifies and table") - for _, tableName := range []string{warehouseutils.IdentifiesTable, warehouseutils.UsersTable} { - err = ch.CreateTable(ctx, tableName, model.TableSchema{ - "id": "string", - "user_id": "string", - "test_int": "int", - "test_float": "float", - "test_bool": "boolean", - "test_string": "string", - "test_datetime": "datetime", - "received_at": "datetime", - }) - require.NoError(t, err) - } + t.Run("Invalid database", func(t *testing.T) { + ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - t.Log("Drop users identifies and table") - for _, tableName := range []string{warehouseutils.IdentifiesTable, warehouseutils.UsersTable} { - err = ch.DropTable(ctx, tableName) - require.NoError(t, err) + warehouse := model.Warehouse{ + Namespace: fmt.Sprintf("%s_invalid_database", namespace), + WorkspaceID: workspaceID, + Destination: backendconfig.DestinationT{ + Config: map[string]any{ + "host": host, + "port": strconv.Itoa(clickhousePort), + "database": "invalid_database", + "user": user, + "password": password, + }, + }, } - t.Log("Verifying empty schema") - schema, unrecognizedSchema, err = ch.FetchSchema(ctx) + err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) + require.NoError(t, err) + + schema, unrecognizedSchema, err := ch.FetchSchema(ctx) require.NoError(t, err) require.Empty(t, schema) require.Empty(t, unrecognizedSchema) }) - } -} - -func TestClickhouse_TestConnection(t *testing.T) { - c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml"})) - c.Start(context.Background()) - - misc.Init() - warehouseutils.Init() - - clickhousePort := c.Port("clickhouse", 9000) - - databaseName := "rudderdb" - password := "rudder-password" - user := "rudder" - workspaceID := "test_workspace_id" - namespace := "test_namespace" - provider := "MINIO" - timeout := 5 * time.Second - - dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", - "localhost", clickhousePort, databaseName, password, user, - ) - - db := connectClickhouseDB(context.Background(), t, dsn) - defer func() { _ = db.Close() }() - - ctx := context.Background() - - testCases := []struct { - name string - host string - tlConfig string - timeout time.Duration - wantError error - }{ - { - name: "DeadlineExceeded", - wantError: errors.New("connection timeout: context deadline exceeded"), - }, - { - name: "Success", - timeout: timeout, - }, - { - name: "TLS config", - timeout: timeout, - tlConfig: "test-tls-config", - }, - { - name: "No such host", - timeout: timeout, - wantError: errors.New(`dial tcp: lookup clickhouse`), - host: "clickhouse", - }, - } - - for i, tc := range testCases { - i := i - tc := tc - t.Run(tc.name, func(t *testing.T) { + t.Run("Empty schema", func(t *testing.T) { ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - host := "localhost" - if tc.host != "" { - host = tc.host - } - warehouse := model.Warehouse{ - Namespace: namespace, + Namespace: fmt.Sprintf("%s_empty_schema", namespace), WorkspaceID: workspaceID, Destination: backendconfig.DestinationT{ - ID: fmt.Sprintf("test-destination-%d", i), Config: map[string]any{ - "bucketProvider": provider, - "host": host, - "port": strconv.Itoa(clickhousePort), - "database": databaseName, - "user": user, - "password": password, - "caCertificate": tc.tlConfig, + "host": host, + "port": strconv.Itoa(clickhousePort), + "database": database, + "user": user, + "password": password, }, }, } @@ -701,318 +424,476 @@ func TestClickhouse_TestConnection(t *testing.T) { err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) require.NoError(t, err) - ch.SetConnectionTimeout(tc.timeout) - - ctx, cancel := context.WithTimeout(context.Background(), tc.timeout) - defer cancel() + err = ch.CreateSchema(ctx) + require.NoError(t, err) - err = ch.TestConnection(ctx, warehouse) - if tc.wantError != nil { - require.ErrorContains(t, err, tc.wantError.Error()) - return - } + schema, unrecognizedSchema, err := ch.FetchSchema(ctx) require.NoError(t, err) + require.Empty(t, schema) + require.Empty(t, unrecognizedSchema) }) - } -} - -func TestClickhouse_LoadTestTable(t *testing.T) { - c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml"})) - c.Start(context.Background()) - - misc.Init() - warehouseutils.Init() - - clickhousePort := c.Port("clickhouse", 9000) - - databaseName := "rudderdb" - password := "rudder-password" - user := "rudder" - workspaceID := "test_workspace_id" - namespace := "test_namespace" - provider := "MINIO" - host := "localhost" - tableName := warehouseutils.CTStagingTablePrefix + "_test_table" - testColumns := model.TableSchema{ - "id": "int", - "val": "string", - } - testPayload := map[string]any{ - "id": 1, - "val": "RudderStack", - } - dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", - "localhost", clickhousePort, databaseName, password, user, - ) - - db := connectClickhouseDB(context.Background(), t, dsn) - defer func() { _ = db.Close() }() - - testCases := []struct { - name string - wantError error - payload map[string]any - }{ - { - name: "Success", - }, - { - name: "Invalid columns", - payload: map[string]any{ - "invalid_val": "Invalid Data", - }, - wantError: errors.New("code: 16, message: No such column invalid_val in table test_namespace.setup_test_staging"), - }, - } - - ctx := context.Background() - - for i, tc := range testCases { - tc := tc - i := i - - t.Run(tc.name, func(t *testing.T) { + t.Run("Unrecognized schema", func(t *testing.T) { ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) warehouse := model.Warehouse{ - Namespace: namespace, + Namespace: fmt.Sprintf("%s_unrecognized_schema", namespace), WorkspaceID: workspaceID, Destination: backendconfig.DestinationT{ Config: map[string]any{ - "bucketProvider": provider, - "host": host, - "port": strconv.Itoa(clickhousePort), - "database": databaseName, - "user": user, - "password": password, + "host": host, + "port": strconv.Itoa(clickhousePort), + "database": database, + "user": user, + "password": password, }, }, } - payload := make(map[string]any) - for k, v := range tc.payload { - payload[k] = v - } - for k, v := range testPayload { - payload[k] = v - } - err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) require.NoError(t, err) err = ch.CreateSchema(ctx) require.NoError(t, err) - tableName := fmt.Sprintf("%s_%d", tableName, i) - - err = ch.CreateTable(ctx, tableName, testColumns) + _, err = ch.DB.Exec(fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s.%s (x Enum('hello' = 1, 'world' = 2)) ENGINE = TinyLog;", + warehouse.Namespace, + table, + )) require.NoError(t, err) - err = ch.LoadTestTable(ctx, "", tableName, payload, "") - if tc.wantError != nil { - require.ErrorContains(t, err, tc.wantError.Error()) - return - } + schema, unrecognizedSchema, err := ch.FetchSchema(ctx) require.NoError(t, err) + require.NotEmpty(t, schema) + require.NotEmpty(t, unrecognizedSchema) + + require.Equal(t, unrecognizedSchema, model.Schema{ + table: { + "x": "", + }, + }) }) - } -} + }) -func TestClickhouse_FetchSchema(t *testing.T) { - c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml"})) - c.Start(context.Background()) + t.Run("Load Table round trip", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) - misc.Init() - warehouseutils.Init() + workspaceID := whutils.RandHex() + minioPort := c.Port("minio", 9000) + clickhousePort := c.Port("clickhouse", 9000) + minioEndpoint := fmt.Sprintf("localhost:%d", minioPort) - clickhousePort := c.Port("clickhouse", 9000) + region := "us-east-1" + table := "test_table" - databaseName := "rudderdb" - password := "rudder-password" - user := "rudder" - workspaceID := "test_workspace_id" - namespace := "test_namespace" - table := "test_table" - - dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", - "localhost", clickhousePort, databaseName, password, user, - ) - - db := connectClickhouseDB(context.Background(), t, dsn) - defer func() { _ = db.Close() }() - - ctx := context.Background() - - t.Run("Success", func(t *testing.T) { - ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - - warehouse := model.Warehouse{ - Namespace: fmt.Sprintf("%s_success", namespace), - WorkspaceID: workspaceID, - Destination: backendconfig.DestinationT{ - Config: map[string]any{ - "host": "localhost", - "port": strconv.Itoa(clickhousePort), - "database": databaseName, - "user": user, - "password": password, - }, + dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", + host, clickhousePort, database, password, user, + ) + db := connectClickhouseDB(t, context.Background(), dsn) + defer func() { _ = db.Close() }() + + testCases := []struct { + name string + fileName string + S3EngineEnabledWorkspaceIDs []string + disableNullable bool + }{ + { + name: "normal loading using downloading of load files", + fileName: "testdata/load.csv.gz", + }, + { + name: "using s3 engine for loading", + S3EngineEnabledWorkspaceIDs: []string{workspaceID}, + fileName: "testdata/load-copy.csv.gz", + }, + { + name: "normal loading using downloading of load files with disable nullable", + fileName: "testdata/load.csv.gz", + disableNullable: true, + }, + { + name: "using s3 engine for loading with disable nullable", + S3EngineEnabledWorkspaceIDs: []string{workspaceID}, + fileName: "testdata/load-copy.csv.gz", + disableNullable: true, }, } - err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) - require.NoError(t, err) + for i, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + conf := config.New() + conf.Set("Warehouse.clickhouse.s3EngineEnabledWorkspaceIDs", tc.S3EngineEnabledWorkspaceIDs) + conf.Set("Warehouse.clickhouse.disableNullable", tc.disableNullable) + + ch := clickhouse.New(conf, logger.NOP, stats.NOP) + + warehouse := model.Warehouse{ + Namespace: fmt.Sprintf("test_namespace_%d", i), + WorkspaceID: workspaceID, + Destination: backendconfig.DestinationT{ + Config: map[string]any{ + "bucketProvider": whutils.MINIO, + "host": host, + "port": strconv.Itoa(clickhousePort), + "database": database, + "user": user, + "password": password, + "bucketName": bucketName, + "accessKeyID": accessKeyID, + "secretAccessKey": secretAccessKey, + "endPoint": minioEndpoint, + }, + }, + } - err = ch.CreateSchema(ctx) - require.NoError(t, err) + t.Log("Preparing load files metadata") + f, err := os.Open(tc.fileName) + require.NoError(t, err) + defer func() { _ = f.Close() }() - err = ch.CreateTable(ctx, table, model.TableSchema{ - "id": "string", - "test_int": "int", - "test_float": "float", - "test_bool": "boolean", - "test_string": "string", - "test_datetime": "datetime", - "received_at": "datetime", - "test_array_bool": "array(boolean)", - "test_array_datetime": "array(datetime)", - "test_array_float": "array(float)", - "test_array_int": "array(int)", - "test_array_string": "array(string)", - }) - require.NoError(t, err) + fm, err := filemanager.New(&filemanager.Settings{ + Provider: whutils.MINIO, + Config: map[string]any{ + "bucketName": bucketName, + "accessKeyID": accessKeyID, + "secretAccessKey": secretAccessKey, + "endPoint": minioEndpoint, + "forcePathStyle": true, + "disableSSL": true, + "region": region, + "enableSSE": false, + }, + }) + require.NoError(t, err) - schema, unrecognizedSchema, err := ch.FetchSchema(ctx) - require.NoError(t, err) - require.NotEmpty(t, schema) - require.Empty(t, unrecognizedSchema) - }) + ctx := context.Background() + uploadOutput, err := fm.Upload(ctx, f, fmt.Sprintf("test_prefix_%d", i)) + require.NoError(t, err) - t.Run("Invalid host", func(t *testing.T) { - ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - - warehouse := model.Warehouse{ - Namespace: fmt.Sprintf("%s_invalid_host", namespace), - WorkspaceID: workspaceID, - Destination: backendconfig.DestinationT{ - Config: map[string]any{ - "host": "clickhouse", - "port": strconv.Itoa(clickhousePort), - "database": databaseName, - "user": user, - "password": password, - }, - }, - } + mockUploader := newMockUploader(t, + strconv.Itoa(minioPort), + model.TableSchema{ + "alter_test_bool": "boolean", + "alter_test_datetime": "datetime", + "alter_test_float": "float", + "alter_test_int": "int", + "alter_test_string": "string", + "id": "string", + "received_at": "datetime", + "test_array_bool": "array(boolean)", + "test_array_datetime": "array(datetime)", + "test_array_float": "array(float)", + "test_array_int": "array(int)", + "test_array_string": "array(string)", + "test_bool": "boolean", + "test_datetime": "datetime", + "test_float": "float", + "test_int": "int", + "test_string": "string", + }, + []whutils.LoadFile{{Location: uploadOutput.Location}}, + ) - err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) - require.NoError(t, err) + t.Log("Setting up clickhouse") + err = ch.Setup(ctx, warehouse, mockUploader) + require.NoError(t, err) - schema, unrecognizedSchema, err := ch.FetchSchema(ctx) - require.ErrorContains(t, err, errors.New("dial tcp: lookup clickhouse").Error()) - require.Empty(t, schema) - require.Empty(t, unrecognizedSchema) - }) + t.Log("Verifying connection") + _, err = ch.Connect(ctx, warehouse) + require.NoError(t, err) - t.Run("Invalid database", func(t *testing.T) { - ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - - warehouse := model.Warehouse{ - Namespace: fmt.Sprintf("%s_invalid_database", namespace), - WorkspaceID: workspaceID, - Destination: backendconfig.DestinationT{ - Config: map[string]any{ - "host": "localhost", - "port": strconv.Itoa(clickhousePort), - "database": "invalid_database", - "user": user, - "password": password, - }, - }, - } + t.Log("Verifying empty schema") + schema, unrecognizedSchema, err := ch.FetchSchema(ctx) + require.NoError(t, err) + require.Empty(t, schema) + require.Empty(t, unrecognizedSchema) - err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) - require.NoError(t, err) + t.Log("Creating schema") + err = ch.CreateSchema(ctx) + require.NoError(t, err) - schema, unrecognizedSchema, err := ch.FetchSchema(ctx) - require.NoError(t, err) - require.Empty(t, schema) - require.Empty(t, unrecognizedSchema) + t.Log("Creating schema twice should not fail") + err = ch.CreateSchema(ctx) + require.NoError(t, err) + + t.Log("Creating table") + err = ch.CreateTable(ctx, table, model.TableSchema{ + "id": "string", + "test_int": "int", + "test_float": "float", + "test_bool": "boolean", + "test_string": "string", + "test_datetime": "datetime", + "received_at": "datetime", + "test_array_bool": "array(boolean)", + "test_array_datetime": "array(datetime)", + "test_array_float": "array(float)", + "test_array_int": "array(int)", + "test_array_string": "array(string)", + }) + require.NoError(t, err) + + t.Log("Adding columns") + err = ch.AddColumns(ctx, table, []whutils.ColumnInfo{ + {Name: "alter_test_int", Type: "int"}, + {Name: "alter_test_float", Type: "float"}, + {Name: "alter_test_bool", Type: "boolean"}, + {Name: "alter_test_string", Type: "string"}, + {Name: "alter_test_datetime", Type: "datetime"}, + }) + require.NoError(t, err) + + t.Log("Verifying schema") + schema, unrecognizedSchema, err = ch.FetchSchema(ctx) + require.NoError(t, err) + require.NotEmpty(t, schema) + require.Empty(t, unrecognizedSchema) + + t.Log("verifying if columns are not like Nullable(T) if disableNullable set to true") + if tc.disableNullable { + rows, err := ch.DB.Query(fmt.Sprintf(`select table, name, type from system.columns where database = '%s'`, warehouse.Namespace)) + require.NoError(t, err) + + defer func() { _ = rows.Close() }() + + var ( + tableName string + columnName string + columnType string + ) + + for rows.Next() { + err = rows.Scan(&tableName, &columnName, &columnType) + require.NoError(t, err) + + if strings.Contains(columnType, "Nullable") { + require.Fail(t, fmt.Sprintf("table %s column %s is of Nullable type even when disableNullable is set to true", tableName, columnName)) + } + } + require.NoError(t, rows.Err()) + } + + t.Log("Loading data into table") + _, err = ch.LoadTable(ctx, table) + require.NoError(t, err) + + t.Log("Drop table") + err = ch.DropTable(ctx, table) + require.NoError(t, err) + + t.Log("Creating users identifies and table") + for _, tableName := range []string{whutils.IdentifiesTable, whutils.UsersTable} { + err = ch.CreateTable(ctx, tableName, model.TableSchema{ + "id": "string", + "user_id": "string", + "test_int": "int", + "test_float": "float", + "test_bool": "boolean", + "test_string": "string", + "test_datetime": "datetime", + "received_at": "datetime", + }) + require.NoError(t, err) + } + + t.Log("Drop users identifies and table") + for _, tableName := range []string{whutils.IdentifiesTable, whutils.UsersTable} { + err = ch.DropTable(ctx, tableName) + require.NoError(t, err) + } + + t.Log("Verifying empty schema") + schema, unrecognizedSchema, err = ch.FetchSchema(ctx) + require.NoError(t, err) + require.Empty(t, schema) + require.Empty(t, unrecognizedSchema) + }) + } }) - t.Run("Empty schema", func(t *testing.T) { - ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - - warehouse := model.Warehouse{ - Namespace: fmt.Sprintf("%s_empty_schema", namespace), - WorkspaceID: workspaceID, - Destination: backendconfig.DestinationT{ - Config: map[string]any{ - "host": "localhost", - "port": strconv.Itoa(clickhousePort), - "database": databaseName, - "user": user, - "password": password, - }, + t.Run("Test connection", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml"})) + c.Start(context.Background()) + + workspaceID := whutils.RandHex() + clickhousePort := c.Port("clickhouse", 9000) + + ctx := context.Background() + namespace := "test_namespace" + timeout := 5 * time.Second + + dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", + host, clickhousePort, database, password, user, + ) + db := connectClickhouseDB(t, context.Background(), dsn) + defer func() { _ = db.Close() }() + + testCases := []struct { + name string + host string + tlConfig string + timeout time.Duration + wantError error + }{ + { + name: "DeadlineExceeded", + wantError: errors.New("connection timeout: context deadline exceeded"), + }, + { + name: "Success", + timeout: timeout, + }, + { + name: "TLS config", + timeout: timeout, + tlConfig: "test-tls-config", + }, + { + name: "No such host", + timeout: timeout, + wantError: errors.New(`dial tcp: lookup clickhouse`), + host: "clickhouse", }, } - err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) - require.NoError(t, err) + for i, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - err = ch.CreateSchema(ctx) - require.NoError(t, err) + host := host + if tc.host != "" { + host = tc.host + } - schema, unrecognizedSchema, err := ch.FetchSchema(ctx) - require.NoError(t, err) - require.Empty(t, schema) - require.Empty(t, unrecognizedSchema) + warehouse := model.Warehouse{ + Namespace: namespace, + WorkspaceID: workspaceID, + Destination: backendconfig.DestinationT{ + ID: fmt.Sprintf("test-destination-%d", i), + Config: map[string]any{ + "bucketProvider": whutils.MINIO, + "host": host, + "port": strconv.Itoa(clickhousePort), + "database": database, + "user": user, + "password": password, + "caCertificate": tc.tlConfig, + }, + }, + } + + err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) + require.NoError(t, err) + + ch.SetConnectionTimeout(tc.timeout) + + ctx, cancel := context.WithTimeout(context.Background(), tc.timeout) + defer cancel() + + err = ch.TestConnection(ctx, warehouse) + if tc.wantError != nil { + require.ErrorContains(t, err, tc.wantError.Error()) + return + } + require.NoError(t, err) + }) + } }) - t.Run("Unrecognized schema", func(t *testing.T) { - ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) - - warehouse := model.Warehouse{ - Namespace: fmt.Sprintf("%s_unrecognized_schema", namespace), - WorkspaceID: workspaceID, - Destination: backendconfig.DestinationT{ - Config: map[string]any{ - "host": "localhost", - "port": strconv.Itoa(clickhousePort), - "database": databaseName, - "user": user, - "password": password, + t.Run("Load test table", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.clickhouse.yml"})) + c.Start(context.Background()) + + workspaceID := whutils.RandHex() + clickhousePort := c.Port("clickhouse", 9000) + + ctx := context.Background() + namespace := "test_namespace" + tableName := whutils.CTStagingTablePrefix + "_test_table" + testColumns := model.TableSchema{ + "id": "int", + "val": "string", + } + testPayload := map[string]any{ + "id": 1, + "val": "RudderStack", + } + + dsn := fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", + host, clickhousePort, database, password, user, + ) + db := connectClickhouseDB(t, context.Background(), dsn) + defer func() { _ = db.Close() }() + + testCases := []struct { + name string + wantError error + payload map[string]any + }{ + { + name: "Success", + }, + { + name: "Invalid columns", + payload: map[string]any{ + "invalid_val": "Invalid Data", }, + wantError: errors.New("code: 16, message: No such column invalid_val in table test_namespace.setup_test_staging"), }, } - err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) - require.NoError(t, err) + for i, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ch := clickhouse.New(config.New(), logger.NOP, stats.NOP) + + warehouse := model.Warehouse{ + Namespace: namespace, + WorkspaceID: workspaceID, + Destination: backendconfig.DestinationT{ + Config: map[string]any{ + "bucketProvider": whutils.MINIO, + "host": host, + "port": strconv.Itoa(clickhousePort), + "database": database, + "user": user, + "password": password, + }, + }, + } - err = ch.CreateSchema(ctx) - require.NoError(t, err) + payload := make(map[string]any) + for k, v := range tc.payload { + payload[k] = v + } + for k, v := range testPayload { + payload[k] = v + } - _, err = ch.DB.Exec(fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s.%s (x Enum('hello' = 1, 'world' = 2)) ENGINE = TinyLog;", - warehouse.Namespace, - table, - )) - require.NoError(t, err) + err := ch.Setup(ctx, warehouse, newMockUploader(t, "", nil, nil)) + require.NoError(t, err) - schema, unrecognizedSchema, err := ch.FetchSchema(ctx) - require.NoError(t, err) - require.NotEmpty(t, schema) - require.NotEmpty(t, unrecognizedSchema) + err = ch.CreateSchema(ctx) + require.NoError(t, err) - require.Equal(t, unrecognizedSchema, model.Schema{ - table: { - "x": "", - }, - }) + tableName := fmt.Sprintf("%s_%d", tableName, i) + + err = ch.CreateTable(ctx, tableName, testColumns) + require.NoError(t, err) + + err = ch.LoadTestTable(ctx, "", tableName, payload, "") + if tc.wantError != nil { + require.ErrorContains(t, err, tc.wantError.Error()) + return + } + require.NoError(t, err) + }) + } }) } -func connectClickhouseDB(ctx context.Context, t testing.TB, dsn string) *sql.DB { +func connectClickhouseDB(t testing.TB, ctx context.Context, dsn string) *sql.DB { t.Helper() db, err := sql.Open("clickhouse", dsn) @@ -1022,108 +903,108 @@ func connectClickhouseDB(ctx context.Context, t testing.TB, dsn string) *sql.DB defer cancel() require.Eventually(t, func() bool { - err := db.PingContext(ctx) - t.Log(err) - return err == nil + if err := db.PingContext(ctx); err != nil { + t.Log("Ping failed:", err) + return false + } + return true }, time.Minute, time.Second) - err = db.PingContext(ctx) - require.NoError(t, err) - + require.NoError(t, db.PingContext(ctx)) return db } func initializeClickhouseClusterMode(t *testing.T, clusterDBs []*sql.DB, tables []string, clusterPost int) { t.Helper() - type ColumnInfoT struct { - ColumnName string - ColumnType string + type columnInfo struct { + columnName string + columnType string } - tableColumnInfoMap := map[string][]ColumnInfoT{ + tableColumnInfoMap := map[string][]columnInfo{ "identifies": { { - ColumnName: "context_passed_ip", - ColumnType: "Nullable(String)", + columnName: "context_passed_ip", + columnType: "Nullable(String)", }, { - ColumnName: "context_library_name", - ColumnType: "Nullable(String)", + columnName: "context_library_name", + columnType: "Nullable(String)", }, }, "product_track": { { - ColumnName: "revenue", - ColumnType: "Nullable(Float64)", + columnName: "revenue", + columnType: "Nullable(Float64)", }, { - ColumnName: "context_passed_ip", - ColumnType: "Nullable(String)", + columnName: "context_passed_ip", + columnType: "Nullable(String)", }, { - ColumnName: "context_library_name", - ColumnType: "Nullable(String)", + columnName: "context_library_name", + columnType: "Nullable(String)", }, }, "tracks": { { - ColumnName: "context_passed_ip", - ColumnType: "Nullable(String)", + columnName: "context_passed_ip", + columnType: "Nullable(String)", }, { - ColumnName: "context_library_name", - ColumnType: "Nullable(String)", + columnName: "context_library_name", + columnType: "Nullable(String)", }, }, "users": { { - ColumnName: "context_passed_ip", - ColumnType: "Nullable(String)", + columnName: "context_passed_ip", + columnType: "Nullable(String)", }, { - ColumnName: "context_library_name", - ColumnType: "SimpleAggregateFunction(anyLast, Nullable(String))", + columnName: "context_library_name", + columnType: "SimpleAggregateFunction(anyLast, Nullable(String))", }, }, "pages": { { - ColumnName: "context_passed_ip", - ColumnType: "Nullable(String)", + columnName: "context_passed_ip", + columnType: "Nullable(String)", }, { - ColumnName: "context_library_name", - ColumnType: "Nullable(String)", + columnName: "context_library_name", + columnType: "Nullable(String)", }, }, "screens": { { - ColumnName: "context_passed_ip", - ColumnType: "Nullable(String)", + columnName: "context_passed_ip", + columnType: "Nullable(String)", }, { - ColumnName: "context_library_name", - ColumnType: "Nullable(String)", + columnName: "context_library_name", + columnType: "Nullable(String)", }, }, "aliases": { { - ColumnName: "context_passed_ip", - ColumnType: "Nullable(String)", + columnName: "context_passed_ip", + columnType: "Nullable(String)", }, { - ColumnName: "context_library_name", - ColumnType: "Nullable(String)", + columnName: "context_library_name", + columnType: "Nullable(String)", }, }, "groups": { { - ColumnName: "context_passed_ip", - ColumnType: "Nullable(String)", + columnName: "context_passed_ip", + columnType: "Nullable(String)", }, { - ColumnName: "context_library_name", - ColumnType: "Nullable(String)", + columnName: "context_library_name", + columnType: "Nullable(String)", }, }, } @@ -1135,7 +1016,7 @@ func initializeClickhouseClusterMode(t *testing.T, clusterDBs []*sql.DB, tables sqlStatement := fmt.Sprintf("RENAME TABLE %[1]s to %[1]s_shard ON CLUSTER rudder_cluster;", table) log.Printf("Renaming tables to sharded tables for distribution view for clickhouse cluster with sqlStatement: %s", sqlStatement) - require.NoError(t, testhelper.WithConstantRetries(func() error { + require.NoError(t, whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(sqlStatement) return err })) @@ -1161,14 +1042,14 @@ func initializeClickhouseClusterMode(t *testing.T, clusterDBs []*sql.DB, tables ) log.Printf("Creating distribution view for clickhouse cluster with sqlStatement: %s", sqlStatement) - require.NoError(t, testhelper.WithConstantRetries(func() error { + require.NoError(t, whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(sqlStatement) return err })) } t.Run("Create Drop Create", func(t *testing.T) { - clusterDB := connectClickhouseDB(context.Background(), t, fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", + clusterDB := connectClickhouseDB(t, context.Background(), fmt.Sprintf("tcp://%s:%d?compress=false&database=%s&password=%s&secure=false&skip_verify=true&username=%s", "localhost", clusterPost, "rudderdb", "rudder-password", "rudder", )) @@ -1190,15 +1071,15 @@ func initializeClickhouseClusterMode(t *testing.T, clusterDBs []*sql.DB, tables ) } - require.NoError(t, testhelper.WithConstantRetries(func() error { + require.NoError(t, whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(createTableSQLStatement()) return err })) - require.NoError(t, testhelper.WithConstantRetries(func() error { + require.NoError(t, whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(fmt.Sprintf(`DROP TABLE rudderdb.%[1]s ON CLUSTER "rudder_cluster";`, testTable)) return err })) - require.NoError(t, testhelper.WithConstantRetries(func() error { + require.NoError(t, whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(createTableSQLStatement()) return err })) @@ -1220,16 +1101,16 @@ func initializeClickhouseClusterMode(t *testing.T, clusterDBs []*sql.DB, tables ) } - require.NoError(t, testhelper.WithConstantRetries(func() error { + require.NoError(t, whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(createTableSQLStatement()) return err })) - require.NoError(t, testhelper.WithConstantRetries(func() error { + require.NoError(t, whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(fmt.Sprintf(`DROP TABLE rudderdb.%[1]s ON CLUSTER "rudder_cluster";`, testTable)) return err })) - err := testhelper.WithConstantRetries(func() error { + err := whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(createTableSQLStatement()) return err }) @@ -1250,14 +1131,14 @@ func initializeClickhouseClusterMode(t *testing.T, clusterDBs []*sql.DB, tables for _, columnInfo := range columnInfos { sqlStatement += fmt.Sprintf(` ADD COLUMN IF NOT EXISTS %[1]s %[2]s,`, - columnInfo.ColumnName, - columnInfo.ColumnType, + columnInfo.columnName, + columnInfo.columnType, ) } sqlStatement = strings.TrimSuffix(sqlStatement, ",") log.Printf("Altering columns for distribution view for clickhouse cluster with sqlStatement: %s", sqlStatement) - require.NoError(t, testhelper.WithConstantRetries(func() error { + require.NoError(t, whth.WithConstantRetries(func() error { _, err := clusterDB.Exec(sqlStatement) return err })) @@ -1269,7 +1150,7 @@ func newMockUploader( t testing.TB, minioPort string, tableSchema model.TableSchema, - metadata []warehouseutils.LoadFile, + metadata []whutils.LoadFile, ) *mockuploader.MockUploader { var sampleLocation string if len(metadata) > 0 { diff --git a/warehouse/integrations/clickhouse/testdata/template.json b/warehouse/integrations/clickhouse/testdata/template.json deleted file mode 100644 index 4eaac923033..00000000000 --- a/warehouse/integrations/clickhouse/testdata/template.json +++ /dev/null @@ -1,261 +0,0 @@ -{ - "enableMetrics": false, - "workspaceId": "{{.workspaceID}}", - "sources": [ - { - "config": { - "eventUpload": false, - "eventUploadTS": 1637229453729 - }, - "id": "{{.sourceID}}", - "name": "clickhouse-integration", - "writeKey": "{{.writeKey}}", - "enabled": true, - "sourceDefinitionId": "1TW3fuvuaZqJs877OEailT17KzZ", - "createdBy": "1wLg8l6vAj2TuUUMIIBKL4nsVOT", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-08-08T14:49:21.580Z", - "updatedAt": "2021-11-18T09:57:33.742Z", - "destinations": [ - { - "config": { - "host": "{{.host}}", - "database": "{{.database}}", - "cluster": "", - "user": "{{.user}}", - "password": "{{.password}}", - "port": "{{.port}}", - "secure": false, - "bucketProvider": "MINIO", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "secretAccessKey": "{{.secretAccessKey}}", - "useSSL": false, - "endPoint": "{{.endPoint}}", - "syncFrequency": "30", - "useRudderStorage": false - }, - "secretConfig": {}, - "id": "{{.destinationID}}", - "name": "clickhouse-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-11-21T18:21:25.599Z", - "updatedAt": "2021-11-21T18:21:25.599Z", - "revisionId": "{{.destinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "database", - "cluster", - "user", - "password", - "port", - "bucketName", - "bucketProvider", - "containerName", - "accessKeyID", - "accessKey", - "accountKey", - "accountName", - "credentials", - "secretAccessKey", - "useSSL", - "endPoint", - "prefix", - "syncFrequency", - "syncStartAt", - "enableSSE", - "secure", - "skipVerify", - "caCertificate", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey", - "accountKey", - "secretAccessKey", - "credentials" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "id": "1eBvkIRSwc2ESGMK9dj6OXq2G12", - "name": "CLICKHOUSE", - "displayName": "ClickHouse", - "category": "warehouse", - "createdAt": "2020-07-03T20:25:41.439Z", - "updatedAt": "2021-11-11T07:55:35.140Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1TW3fuvuaZqJs877OEailT17KzZ", - "name": "Javascript", - "displayName": "Javascript", - "category": null, - "createdAt": "2019-11-12T12:35:30.464Z", - "updatedAt": "2021-09-28T02:27:30.373Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "eventUpload": false, - "eventUploadTS": 1637229453729 - }, - "id": "{{.clusterSourceID}}", - "name": "clickhouse-cluster-integration", - "writeKey": "{{.clusterWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1TW3fuvuaZqJs877OEailT17KzZ", - "createdBy": "1wLg8l6vAj2TuUUMIIBKL4nsVOT", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-08-08T14:49:21.580Z", - "updatedAt": "2021-11-18T09:57:33.742Z", - "destinations": [ - { - "config": { - "host": "{{.clusterHost}}", - "database": "{{.clusterDatabase}}", - "cluster": "{{.cluster}}", - "user": "{{.clusterUser}}", - "password": "{{.clusterPassword}}", - "port": "{{.clusterPort}}", - "secure": false, - "bucketProvider": "MINIO", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "secretAccessKey": "{{.secretAccessKey}}", - "useSSL": false, - "endPoint": "{{.endPoint}}", - "syncFrequency": "30", - "useRudderStorage": false - }, - "secretConfig": {}, - "id": "{{.clusterDestinationID}}", - "name": "clickhouse-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-11-21T18:21:25.599Z", - "updatedAt": "2021-11-21T18:21:25.599Z", - "revisionId": "{{.clusterDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "database", - "cluster", - "user", - "password", - "port", - "bucketName", - "bucketProvider", - "containerName", - "accessKeyID", - "accessKey", - "accountKey", - "accountName", - "credentials", - "secretAccessKey", - "useSSL", - "endPoint", - "prefix", - "syncFrequency", - "syncStartAt", - "enableSSE", - "secure", - "skipVerify", - "caCertificate", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey", - "accountKey", - "secretAccessKey", - "credentials" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "id": "1eBvkIRSwc2ESGMK9dj6OXq2G12", - "name": "CLICKHOUSE", - "displayName": "ClickHouse", - "category": "warehouse", - "createdAt": "2020-07-03T20:25:41.439Z", - "updatedAt": "2021-11-11T07:55:35.140Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1TW3fuvuaZqJs877OEailT17KzZ", - "name": "Javascript", - "displayName": "Javascript", - "category": null, - "createdAt": "2019-11-12T12:35:30.464Z", - "updatedAt": "2021-09-28T02:27:30.373Z" - }, - "dgSourceTrackingPlanConfig": null - } - ], - "libraries": [ - { - "versionId": "23Uxw7QEiOg8e0KkQV8LmNfWaWh" - } - ] -} diff --git a/warehouse/integrations/datalake/datalake_test.go b/warehouse/integrations/datalake/datalake_test.go index bd074e76398..80dbdc2cecb 100644 --- a/warehouse/integrations/datalake/datalake_test.go +++ b/warehouse/integrations/datalake/datalake_test.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "os" - "strconv" "strings" "testing" "time" @@ -19,7 +18,10 @@ import ( "github.com/rudderlabs/compose-test/compose" - "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/warehouse/validations" "github.com/minio/minio-go/v7/pkg/credentials" @@ -27,18 +29,11 @@ import ( kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" - "github.com/rudderlabs/rudder-server/runner" - "github.com/rudderlabs/rudder-server/testhelper/health" - "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" - - "github.com/rudderlabs/rudder-server/utils/misc" - "github.com/rudderlabs/rudder-server/warehouse/validations" + whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" "github.com/stretchr/testify/require" - backendconfig "github.com/rudderlabs/rudder-server/backend-config" - - warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" _ "github.com/trinodb/trino-go-client/trino" ) @@ -48,129 +43,47 @@ func TestIntegration(t *testing.T) { t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.") } - c := testcompose.New(t, compose.FilePaths([]string{ - "testdata/docker-compose.yml", - "testdata/docker-compose.trino.yml", - "testdata/docker-compose.spark.yml", - "../testdata/docker-compose.jobsdb.yml", - "../testdata/docker-compose.minio.yml", - })) - c.Start(context.Background()) - misc.Init() validations.Init() - warehouseutils.Init() - - jobsDBPort := c.Port("jobsDb", 5432) - minioPort := c.Port("minio", 9000) - azurePort := c.Port("azure", 10000) - gcsPort := c.Port("gcs", 4443) - trinoPort := c.Port("trino", 8080) - - httpPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - workspaceID := warehouseutils.RandHex() - azWriteKey := warehouseutils.RandHex() - azDestinationID := warehouseutils.RandHex() - azSourceID := warehouseutils.RandHex() - s3WriteKey := warehouseutils.RandHex() - s3DestinationID := warehouseutils.RandHex() - s3SourceID := warehouseutils.RandHex() - gcsWriteKey := warehouseutils.RandHex() - gcsDestinationID := warehouseutils.RandHex() - gcsSourceID := warehouseutils.RandHex() + whutils.Init() azContainerName := "azure-datalake-test" s3BucketName := "some-bucket" gcsBucketName := "gcs-datalake-test" azAccountName := "MYACCESSKEY" azAccountKey := "TVlTRUNSRVRLRVk=" - azEndPoint := fmt.Sprintf("localhost:%d", azurePort) s3Region := "us-east-1" s3AccessKeyID := "MYACCESSKEY" s3AccessKey := "MYSECRETKEY" - s3EndPoint := fmt.Sprintf("localhost:%d", minioPort) - gcsEndPoint := fmt.Sprintf("http://localhost:%d/storage/v1/", gcsPort) - - accessKeyID := "MYACCESSKEY" - secretAccessKey := "MYSECRETKEY" - - minioEndpoint := fmt.Sprintf("localhost:%d", minioPort) - - templateConfigurations := map[string]any{ - "workspaceID": workspaceID, - "azWriteKey": azWriteKey, - "azDestinationID": azDestinationID, - "azSourceID": azSourceID, - "s3WriteKey": s3WriteKey, - "s3DestinationID": s3DestinationID, - "s3SourceID": s3SourceID, - "gcsWriteKey": gcsWriteKey, - "gcsDestinationID": gcsDestinationID, - "gcsSourceID": gcsSourceID, - "azContainerName": azContainerName, - "azAccountName": azAccountName, - "azAccountKey": azAccountKey, - "azEndpoint": azEndPoint, - "s3BucketName": s3BucketName, - "s3Region": s3Region, - "s3AccessKeyID": s3AccessKeyID, - "s3AccessKey": s3AccessKey, - "s3EndPoint": s3EndPoint, - "gcsBucketName": gcsBucketName, - "gcsEndPoint": gcsEndPoint, - } - - workspaceConfigPath := workspaceConfig.CreateTempFile(t, "testdata/template.json", templateConfigurations) - - testhelper.EnhanceWithDefaultEnvs(t) - t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("MINIO_ACCESS_KEY_ID", accessKeyID) - t.Setenv("MINIO_SECRET_ACCESS_KEY", secretAccessKey) - t.Setenv("MINIO_MINIO_ENDPOINT", minioEndpoint) - t.Setenv("MINIO_SSL", "false") - t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_JSONPATH", workspaceConfigPath) - t.Setenv("STORAGE_EMULATOR_HOST", fmt.Sprintf("localhost:%d", gcsPort)) - t.Setenv("RSERVER_WORKLOAD_IDENTITY_TYPE", "GKE") - svcDone := make(chan struct{}) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + t.Run("Events flow", func(t *testing.T) { + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) - go func() { - r := runner.New(runner.ReleaseInfo{}) - _ = r.Run(ctx, []string{"dataLake-integration-test"}) + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.azure.yml", "testdata/docker-compose.gcs.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) - close(svcDone) - }() - t.Cleanup(func() { <-svcDone }) + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) + azEndPoint := fmt.Sprintf("localhost:%d", c.Port("azure", 10000)) + s3EndPoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + gcsEndPoint := fmt.Sprintf("http://localhost:%d/storage/v1/", c.Port("gcs", 4443)) - serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) - health.WaitUntilReady(ctx, t, serviceHealthEndpoint, time.Minute, time.Second, "serviceHealthEndpoint") + jobsDB := whth.JobsDB(t, jobsDBPort) - t.Run("Events flow", func(t *testing.T) { testCases := []struct { name string - writeKey string tables []string - sourceID string - destinationID string destType string conf map[string]interface{} - prerequisite func(t testing.TB) + prerequisite func(t testing.TB, ctx context.Context) stagingFilePrefix string + configOverride map[string]any }{ { - name: "S3Datalake", - writeKey: s3WriteKey, - tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, - sourceID: s3SourceID, - destinationID: s3DestinationID, - destType: warehouseutils.S3Datalake, + name: "S3Datalake", + tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, + destType: whutils.S3Datalake, conf: map[string]interface{}{ "region": s3Region, "bucketName": s3BucketName, @@ -183,18 +96,25 @@ func TestIntegration(t *testing.T) { "prefix": "some-prefix", "syncFrequency": "30", }, - prerequisite: func(t testing.TB) { + prerequisite: func(t testing.TB, ctx context.Context) { createMinioBucket(t, ctx, s3EndPoint, s3AccessKeyID, s3AccessKey, s3BucketName, s3Region) }, stagingFilePrefix: "testdata/upload-job-s3-datalake", + configOverride: map[string]any{ + "region": s3Region, + "bucketName": s3BucketName, + "accessKeyID": s3AccessKeyID, + "accessKey": s3AccessKey, + "endPoint": s3EndPoint, + "enableSSE": false, + "s3ForcePathStyle": true, + "disableSSL": true, + }, }, { - name: "GCSDatalake", - writeKey: gcsWriteKey, - tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "_groups"}, - sourceID: gcsSourceID, - destinationID: gcsDestinationID, - destType: warehouseutils.GCSDatalake, + name: "GCSDatalake", + tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "_groups"}, + destType: whutils.GCSDatalake, conf: map[string]interface{}{ "bucketName": gcsBucketName, "prefix": "", @@ -203,18 +123,21 @@ func TestIntegration(t *testing.T) { "jsonReads": true, "syncFrequency": "30", }, - prerequisite: func(t testing.TB) { + prerequisite: func(t testing.TB, ctx context.Context) { createGCSBucket(t, ctx, gcsEndPoint, gcsBucketName) }, stagingFilePrefix: "testdata/upload-job-gcs-datalake", + configOverride: map[string]any{ + "bucketName": gcsBucketName, + "endPoint": gcsEndPoint, + "disableSSL": true, + "jsonReads": true, + }, }, { - name: "AzureDatalake", - writeKey: azWriteKey, - tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, - sourceID: azSourceID, - destinationID: azDestinationID, - destType: warehouseutils.AzureDatalake, + name: "AzureDatalake", + tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, + destType: whutils.AzureDatalake, conf: map[string]interface{}{ "containerName": azContainerName, "prefix": "", @@ -226,48 +149,89 @@ func TestIntegration(t *testing.T) { "disableSSL": true, }, stagingFilePrefix: "testdata/upload-job-azure-datalake", + configOverride: map[string]any{ + "containerName": azContainerName, + "accountName": azAccountName, + "accountKey": azAccountKey, + "endPoint": azEndPoint, + "forcePathStyle": true, + "disableSSL": true, + }, }, } - jobsDB := testhelper.JobsDB(t, jobsDBPort) - for _, tc := range testCases { - tc := tc - t.Run(tc.name, func(t *testing.T) { + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + namespace = whth.RandSchema(tc.destType) + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(tc.destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("namespace", namespace). + WithConfigOption("syncFrequency", "30") + for k, v := range tc.configOverride { + destinationBuilder = destinationBuilder.WithConfigOption(k, v) + } + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("STORAGE_EMULATOR_HOST", fmt.Sprintf("localhost:%d", c.Port("gcs", 4443))) + t.Setenv("RSERVER_WORKLOAD_IDENTITY_TYPE", "GKE") + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + ctx := context.Background() + if tc.prerequisite != nil { - tc.prerequisite(t) + tc.prerequisite(t, ctx) } t.Log("verifying test case 1") - ts1 := testhelper.TestConfig{ - WriteKey: tc.writeKey, + ts1 := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, DestinationType: tc.destType, Config: tc.conf, WorkspaceID: workspaceID, JobsDB: jobsDB, HTTPPort: httpPort, - UserID: testhelper.GetUserId(tc.destType), + UserID: whth.GetUserId(tc.destType), SkipWarehouse: true, StagingFilePath: tc.stagingFilePrefix + ".staging-1.json", } ts1.VerifyEvents(t) t.Log("verifying test case 2") - ts2 := testhelper.TestConfig{ - WriteKey: tc.writeKey, + ts2 := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, DestinationType: tc.destType, Config: tc.conf, WorkspaceID: workspaceID, JobsDB: jobsDB, HTTPPort: httpPort, - UserID: testhelper.GetUserId(tc.destType), + UserID: whth.GetUserId(tc.destType), SkipWarehouse: true, StagingFilePath: tc.stagingFilePrefix + ".staging-2.json", } @@ -276,11 +240,179 @@ func TestIntegration(t *testing.T) { } }) - t.Run("S3 DataLake Validation", func(t *testing.T) { + t.Run("Validations", func(t *testing.T) { + t.Run("S3 DataLake", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + s3EndPoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + + ctx := context.Background() + namespace := whth.RandSchema(whutils.S3Datalake) + + createMinioBucket(t, ctx, s3EndPoint, s3AccessKeyID, s3AccessKey, s3BucketName, s3Region) + + dest := backendconfig.DestinationT{ + ID: "test_destination_id", + Config: map[string]interface{}{ + "region": s3Region, + "bucketName": s3BucketName, + "accessKeyID": s3AccessKeyID, + "accessKey": s3AccessKey, + "endPoint": s3EndPoint, + "namespace": namespace, + "enableSSE": false, + "s3ForcePathStyle": true, + "disableSSL": true, + "prefix": "some-prefix", + "syncFrequency": "30", + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + ID: "1xAu2vuR0scUwkBivf6VhqwWgcS", + Name: "S3_DATALAKE", + DisplayName: "S3 Datalake", + }, + Name: "s3-datalake-demo", + Enabled: true, + RevisionID: "29HgOWobnr0RYZLpaSwPINb2987", + } + whth.VerifyConfigurationTest(t, dest) + }) + + t.Run("GCS DataLake", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.gcs.yml"})) + c.Start(context.Background()) + + gcsEndPoint := fmt.Sprintf("http://localhost:%d/storage/v1/", c.Port("gcs", 4443)) + + ctx := context.Background() + namespace := whth.RandSchema(whutils.GCSDatalake) + + t.Setenv("STORAGE_EMULATOR_HOST", fmt.Sprintf("localhost:%d", c.Port("gcs", 4443))) + t.Setenv("RSERVER_WORKLOAD_IDENTITY_TYPE", "GKE") + + createGCSBucket(t, ctx, gcsEndPoint, gcsBucketName) + + dest := backendconfig.DestinationT{ + ID: "test_destination_id", + Config: map[string]interface{}{ + "bucketName": gcsBucketName, + "prefix": "", + "endPoint": gcsEndPoint, + "namespace": namespace, + "disableSSL": true, + "jsonReads": true, + "syncFrequency": "30", + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + ID: "20lzWVRwzEimkq87sNQuz1or2GA", + Name: "GCS_DATALAKE", + DisplayName: "Google Cloud Storage Datalake", + }, + Name: "gcs-datalake-demo", + Enabled: true, + RevisionID: "29HgOWobnr0RYZpLASwPINb2987", + } + whth.VerifyConfigurationTest(t, dest) + }) + + t.Run("Azure DataLake", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.azure.yml"})) + c.Start(context.Background()) + + azEndPoint := fmt.Sprintf("localhost:%d", c.Port("azure", 10000)) + + namespace := whth.RandSchema(whutils.AzureDatalake) + + dest := backendconfig.DestinationT{ + ID: "test_destination_id", + Config: map[string]interface{}{ + "containerName": azContainerName, + "prefix": "", + "accountName": azAccountName, + "accountKey": azAccountKey, + "endPoint": azEndPoint, + "namespace": namespace, + "syncFrequency": "30", + "forcePathStyle": true, + "disableSSL": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + ID: "20lzXg0c5kCBRxGoOoKjCSyZ3AC", + Name: "AZURE_DATALAKE", + DisplayName: "Azure Datalake", + }, + Name: "azure-datalake-demo", + Enabled: true, + RevisionID: "29HgOWobnr0RYZLpaSwPIbN2987", + } + whth.VerifyConfigurationTest(t, dest) + }) + }) + + t.Run("Trino", func(t *testing.T) { + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.trino.yml", "testdata/docker-compose.hive-metastore.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + destType := whutils.S3Datalake + + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) + s3EndPoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + + jobsDB := whth.JobsDB(t, jobsDBPort) + + ctx := context.Background() + sourceID := whutils.RandHex() + destinationID := whutils.RandHex() + writeKey := whutils.RandHex() + namespace := whth.RandSchema(destType) + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection( + backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("namespace", namespace). + WithConfigOption("syncFrequency", "30"). + WithConfigOption("region", s3Region). + WithConfigOption("bucketName", s3BucketName). + WithConfigOption("accessKeyID", s3AccessKeyID). + WithConfigOption("accessKey", s3AccessKey). + WithConfigOption("endPoint", s3EndPoint). + WithConfigOption("enableSSE", false). + WithConfigOption("s3ForcePathStyle", true). + WithConfigOption("disableSSL", true). + WithConfigOption("prefix", "some-prefix").Build(), + ). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + createMinioBucket(t, ctx, s3EndPoint, s3AccessKeyID, s3AccessKey, s3BucketName, s3Region) - dest := backendconfig.DestinationT{ - ID: s3DestinationID, + ts := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, + Tables: []string{"tracks"}, + StagingFilesEventsMap: whth.EventsCountMap{"wh_staging_files": 8}, + LoadFilesEventsMap: map[string]int{"tracks": 8}, + TableUploadsEventsMap: map[string]int{"tracks": 8}, + WarehouseEventsMap: map[string]int{"tracks": 8}, + SourceID: sourceID, + DestinationID: destinationID, + DestinationType: destType, Config: map[string]interface{}{ "region": s3Region, "bucketName": s3BucketName, @@ -293,74 +425,23 @@ func TestIntegration(t *testing.T) { "prefix": "some-prefix", "syncFrequency": "30", }, - DestinationDefinition: backendconfig.DestinationDefinitionT{ - ID: "1xAu2vuR0scUwkBivf6VhqwWgcS", - Name: "S3_DATALAKE", - DisplayName: "S3 Datalake", - }, - Name: "s3-datalake-demo", - Enabled: true, - RevisionID: "29HgOWobnr0RYZLpaSwPINb2987", + WorkspaceID: workspaceID, + JobsDB: jobsDB, + HTTPPort: httpPort, + UserID: whth.GetUserId(destType), + SkipWarehouse: true, + StagingFilePath: "testdata/trino.staging.json", } - testhelper.VerifyConfigurationTest(t, dest) - }) - - t.Run("GCS DataLake Validation", func(t *testing.T) { - createGCSBucket(t, ctx, gcsEndPoint, gcsBucketName) + ts.VerifyEvents(t) - dest := backendconfig.DestinationT{ - ID: gcsDestinationID, - Config: map[string]interface{}{ - "bucketName": gcsBucketName, - "prefix": "", - "endPoint": gcsEndPoint, - "disableSSL": true, - "jsonReads": true, - "syncFrequency": "30", - }, - DestinationDefinition: backendconfig.DestinationDefinitionT{ - ID: "20lzWVRwzEimkq87sNQuz1or2GA", - Name: "GCS_DATALAKE", - DisplayName: "Google Cloud Storage Datalake", - }, - Name: "gcs-datalake-demo", - Enabled: true, - RevisionID: "29HgOWobnr0RYZpLASwPINb2987", - } - testhelper.VerifyConfigurationTest(t, dest) - }) - - t.Run("Azure DataLake Validation", func(t *testing.T) { - dest := backendconfig.DestinationT{ - ID: azDestinationID, - Config: map[string]interface{}{ - "containerName": azContainerName, - "prefix": "", - "accountName": azAccountName, - "accountKey": azAccountKey, - "endPoint": azEndPoint, - "syncFrequency": "30", - "forcePathStyle": true, - "disableSSL": true, - }, - DestinationDefinition: backendconfig.DestinationDefinitionT{ - ID: "20lzXg0c5kCBRxGoOoKjCSyZ3AC", - Name: "AZURE_DATALAKE", - DisplayName: "Azure Datalake", - }, - Name: "azure-datalake-demo", - Enabled: true, - RevisionID: "29HgOWobnr0RYZLpaSwPIbN2987", - } - testhelper.VerifyConfigurationTest(t, dest) - }) - - t.Run("Trino", func(t *testing.T) { dsn := fmt.Sprintf("http://user@localhost:%d?catalog=minio&schema=default&session_properties=minio.parquet_use_column_index=true", - trinoPort, + c.Port("trino", 8080), ) db, err := sql.Open("trino", dsn) require.NoError(t, err) + t.Cleanup(func() { + _ = db.Close() + }) require.Eventually(t, func() bool { _, err := db.ExecContext(ctx, `SELECT 1`) @@ -369,7 +450,6 @@ func TestIntegration(t *testing.T) { 60*time.Second, 100*time.Millisecond, ) - require.Eventually(t, func() bool { _, err = db.ExecContext(ctx, ` CREATE SCHEMA IF NOT EXISTS minio.rudderstack WITH ( @@ -384,7 +464,6 @@ func TestIntegration(t *testing.T) { 60*time.Second, 1*time.Second, ) - require.Eventually(t, func() bool { _, err = db.ExecContext(ctx, ` CREATE TABLE IF NOT EXISTS minio.rudderstack.tracks ( @@ -408,7 +487,7 @@ func TestIntegration(t *testing.T) { uuid_ts TIMESTAMP ) WITH ( - external_location = 's3a://`+s3BucketName+`/some-prefix/rudder-datalake/s_3_datalake_integration/tracks/2023/05/12/04/', + external_location = 's3a://`+s3BucketName+`/some-prefix/rudder-datalake/`+namespace+`/tracks/2023/05/12/04/', format = 'PARQUET' ) `) @@ -423,7 +502,6 @@ func TestIntegration(t *testing.T) { ) var count int64 - require.Eventually(t, func() bool { err := db.QueryRowContext(ctx, ` select @@ -449,7 +527,7 @@ func TestIntegration(t *testing.T) { from minio.rudderstack.tracks where - context_destination_id = '`+s3DestinationID+`' + context_destination_id = '`+destinationID+`' `).Scan(&count) if err != nil { t.Log("select count with where clause: ", err) @@ -464,6 +542,89 @@ func TestIntegration(t *testing.T) { }) t.Run("Spark", func(t *testing.T) { + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.spark.yml", "testdata/docker-compose.hive-metastore.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + destType := whutils.S3Datalake + + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) + s3EndPoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + + jobsDB := whth.JobsDB(t, jobsDBPort) + + ctx := context.Background() + sourceID := whutils.RandHex() + destinationID := whutils.RandHex() + writeKey := whutils.RandHex() + namespace := whth.RandSchema(destType) + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection( + backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("namespace", namespace). + WithConfigOption("syncFrequency", "30"). + WithConfigOption("region", s3Region). + WithConfigOption("bucketName", s3BucketName). + WithConfigOption("accessKeyID", s3AccessKeyID). + WithConfigOption("accessKey", s3AccessKey). + WithConfigOption("endPoint", s3EndPoint). + WithConfigOption("enableSSE", false). + WithConfigOption("s3ForcePathStyle", true). + WithConfigOption("disableSSL", true). + WithConfigOption("prefix", "some-prefix").Build(), + ). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + createMinioBucket(t, ctx, s3EndPoint, s3AccessKeyID, s3AccessKey, s3BucketName, s3Region) + + ts := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, + Tables: []string{"tracks"}, + StagingFilesEventsMap: whth.EventsCountMap{"wh_staging_files": 8}, + LoadFilesEventsMap: map[string]int{"tracks": 8}, + TableUploadsEventsMap: map[string]int{"tracks": 8}, + WarehouseEventsMap: map[string]int{"tracks": 8}, + SourceID: sourceID, + DestinationID: destinationID, + DestinationType: destType, + Config: map[string]interface{}{ + "region": s3Region, + "bucketName": s3BucketName, + "accessKeyID": s3AccessKeyID, + "accessKey": s3AccessKey, + "endPoint": s3EndPoint, + "enableSSE": false, + "s3ForcePathStyle": true, + "disableSSL": true, + "prefix": "some-prefix", + "syncFrequency": "30", + }, + WorkspaceID: workspaceID, + JobsDB: jobsDB, + HTTPPort: httpPort, + UserID: whth.GetUserId(destType), + SkipWarehouse: true, + StagingFilePath: "testdata/spark.staging.json", + } + ts.VerifyEvents(t) + _ = c.Exec(ctx, "spark-master", "spark-sql", @@ -489,7 +650,7 @@ func TestIntegration(t *testing.T) { uuid_ts timestamp ) STORED AS PARQUET - location "s3a://some-bucket/some-prefix/rudder-datalake/s_3_datalake_integration/tracks/2023/05/12/04/"; + location "s3a://`+s3BucketName+`/some-prefix/rudder-datalake/`+namespace+`/tracks/2023/05/12/04/"; `, "-S", ) @@ -520,7 +681,7 @@ func TestIntegration(t *testing.T) { from tracks where - context_destination_id = '`+s3DestinationID+`'; + context_destination_id = '`+destinationID+`'; `, "-S", ) diff --git a/warehouse/integrations/datalake/testdata/docker-compose.yml b/warehouse/integrations/datalake/testdata/docker-compose.azure.yml similarity index 67% rename from warehouse/integrations/datalake/testdata/docker-compose.yml rename to warehouse/integrations/datalake/testdata/docker-compose.azure.yml index 5c87c106fd4..0741a012e44 100644 --- a/warehouse/integrations/datalake/testdata/docker-compose.yml +++ b/warehouse/integrations/datalake/testdata/docker-compose.azure.yml @@ -12,9 +12,3 @@ services: test: nc -z 0.0.0.0 10000 || exit 1 interval: 1s retries: 25 - - gcs: - image: fsouza/fake-gcs-server:latest - ports: - - "4443" - command: [ "-scheme", "http", "-location", "us-east-1", "-backend", "memory" ] diff --git a/warehouse/integrations/datalake/testdata/docker-compose.gcs.yml b/warehouse/integrations/datalake/testdata/docker-compose.gcs.yml new file mode 100644 index 00000000000..d92920f47d6 --- /dev/null +++ b/warehouse/integrations/datalake/testdata/docker-compose.gcs.yml @@ -0,0 +1,8 @@ +version: "3.9" + +services: + gcs: + image: fsouza/fake-gcs-server:latest + ports: + - "4443" + command: [ "-scheme", "http", "-location", "us-east-1", "-backend", "memory" ] diff --git a/warehouse/integrations/datalake/testdata/docker-compose.hive-metastore.yml b/warehouse/integrations/datalake/testdata/docker-compose.hive-metastore.yml new file mode 100644 index 00000000000..ecd91ac85cf --- /dev/null +++ b/warehouse/integrations/datalake/testdata/docker-compose.hive-metastore.yml @@ -0,0 +1,18 @@ +version: '3.7' +services: + hive-metastore: + image: rudderstack/hive-metastore:latest + ports: + - "9083" + volumes: + - ./conf/metastore-site.xml:/opt/apache-hive-metastore-3.0.0-bin/conf/metastore-site.xml:ro + environment: + METASTORE_DB_HOSTNAME: jobsDb + METASTORE_TYPE: postgres + depends_on: + - jobsDb + deploy: + resources: + limits: + cpus: '1' + memory: 1G diff --git a/warehouse/integrations/datalake/testdata/docker-compose.trino.yml b/warehouse/integrations/datalake/testdata/docker-compose.trino.yml index a242fc5b892..8ee8a5c563d 100644 --- a/warehouse/integrations/datalake/testdata/docker-compose.trino.yml +++ b/warehouse/integrations/datalake/testdata/docker-compose.trino.yml @@ -19,20 +19,3 @@ services: timeout: 5s start_period: 10s retries: 25 - - hive-metastore: - image: rudderstack/hive-metastore:latest - ports: - - "9083" - volumes: - - ./conf/metastore-site.xml:/opt/apache-hive-metastore-3.0.0-bin/conf/metastore-site.xml:ro - environment: - METASTORE_DB_HOSTNAME: jobsDb - METASTORE_TYPE: postgres - depends_on: - - jobsDb - deploy: - resources: - limits: - cpus: '1' - memory: 1G diff --git a/warehouse/integrations/datalake/testdata/spark.staging.json b/warehouse/integrations/datalake/testdata/spark.staging.json new file mode 100644 index 00000000000..085cda2e0a2 --- /dev/null +++ b/warehouse/integrations/datalake/testdata/spark.staging.json @@ -0,0 +1,8 @@ +{"data": {"id": "920add93-60fe-4b72-b8a2-785633e24e73", "event": "product_track", "sent_at": "2023-05-12T04:29:04.211Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:00.167Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:00.168Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:04.211Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:00.168+05:30"}} +{"data": {"id": "213f65b4-e283-41ae-9df5-bd84592aa6c6", "event": "product_track", "sent_at": "2023-05-12T04:29:04.211Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:00.455Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:00.456Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:04.211Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:00.456+05:30"}} +{"data": {"id": "f56ddbcc-f8d8-49d8-b69f-d9236dd2397c", "event": "product_track", "sent_at": "2023-05-12T04:29:04.212Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:00.822Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:00.823Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:04.212Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:00.823+05:30"}} +{"data": {"id": "3ba02184-51f1-4382-bd46-b1b499ff999d", "event": "product_track", "sent_at": "2023-05-12T04:29:04.213Z", "user_id": "{{.userID}}", "timestamp": "2023-05-12T04:29:01.250Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:01.251Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:04.213Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:01.251+05:30"}} +{"data": {"id": "9b9c4cba-9f08-4283-8f43-e7f1a7fd3589", "event": "product_track", "sent_at": "2023-05-12T04:29:37.772Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:36.953Z", "context_ip": "14.5.67.21", "event_text": "Product Track", "received_at": "2023-05-12T04:29:36.954Z", "context_passed_ip": "14.5.67.21", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:37.772Z", "context_source_type": "HTTP", "context_library_name": "http", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_passed_ip": "string", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_library_name": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:36.954+05:30"}} +{"data": {"id": "1b49518c-671c-43f3-800f-5e44c1aae772", "event": "product_track", "sent_at": "2023-05-12T04:29:37.773Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:37.228Z", "context_ip": "14.5.67.21", "event_text": "Product Track", "received_at": "2023-05-12T04:29:37.229Z", "context_passed_ip": "14.5.67.21", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:37.773Z", "context_source_type": "HTTP", "context_library_name": "http", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_passed_ip": "string", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_library_name": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:37.229+05:30"}} +{"data": {"id": "6cc21bde-c601-4b78-9eaa-b26c3fe4e808", "event": "product_track", "sent_at": "2023-05-12T04:29:38.607Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:37.671Z", "context_ip": "14.5.67.21", "event_text": "Product Track", "received_at": "2023-05-12T04:29:37.672Z", "context_passed_ip": "14.5.67.21", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:38.607Z", "context_source_type": "HTTP", "context_library_name": "http", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_passed_ip": "string", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_library_name": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:37.672+05:30"}} +{"data": {"id": "1a922433-2a6d-47a3-95bb-1b7415f45bc5", "event": "product_track", "sent_at": "2023-05-12T04:29:38.608Z", "user_id": "{{.userID}}", "timestamp": "2023-05-12T04:29:38.052Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:38.053Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:38.608Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:38.053+05:30"}} diff --git a/warehouse/integrations/datalake/testdata/template.json b/warehouse/integrations/datalake/testdata/template.json deleted file mode 100644 index 0f1ed9347e9..00000000000 --- a/warehouse/integrations/datalake/testdata/template.json +++ /dev/null @@ -1,327 +0,0 @@ -{ - "enableMetrics": false, - "workspaceId": "{{.workspaceID}}", - "sources": [ - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "id": "{{.azSourceID}}", - "name": "azure-datalake-integration", - "writeKey": "{{.azWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "279Kz3NhcXsUAx2KHnx1HKhdtbk", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-03-31T11:45:57.339Z", - "updatedAt": "2022-05-23T22:56:32.000Z", - "destinations": [ - { - "config": { - "containerName": "{{.azContainerName}}", - "prefix": "", - "accountName": "{{.azAccountName}}", - "accountKey": "{{.azAccountKey}}", - "endPoint": "{{.azEndpoint}}", - "syncFrequency": "30", - "forcePathStyle": true, - "disableSSL": true - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.azDestinationID}}", - "name": "azure-datalake-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-04-07T09:57:14.783Z", - "updatedAt": "2022-05-17T08:16:32.613Z", - "revisionId": "{{.azDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "containerName", - "prefix", - "namespace", - "accountName", - "accountKey", - "useSASTokens", - "sasToken", - "syncFrequency", - "syncStartAt" - ] - }, - "secretKeys": [ - "accountKey", - "sasToken" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "warehouse", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": {}, - "options": null, - "uiConfig": null, - "id": "20lzXg0c5kCBRxGoOoKjCSyZ3AC", - "name": "AZURE_DATALAKE", - "displayName": "Azure Datalake", - "category": "warehouse", - "createdAt": "2021-11-11T12:33:22.355Z", - "updatedAt": "2022-09-01T15:23:38.781Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "id": "{{.s3SourceID}}", - "name": "s3-datalake-integration", - "writeKey": "{{.s3WriteKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "279Kz3NhcXsUAx2KHnx1HKhdtbk", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-03-31T11:45:57.339Z", - "updatedAt": "2022-05-23T22:56:32.000Z", - "destinations": [ - { - "config": { - "region": "{{.s3Region}}", - "bucketName": "{{.s3BucketName}}", - "accessKeyID": "{{.s3AccessKeyID}}", - "accessKey": "{{.s3AccessKey}}", - "endPoint": "{{.s3EndPoint}}", - "enableSSE": false, - "s3ForcePathStyle": true, - "disableSSL": true, - "prefix": "some-prefix", - "syncFrequency": "30" - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.s3DestinationID}}", - "name": "s3-datalake-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-04-07T09:57:14.783Z", - "updatedAt": "2022-05-17T08:16:32.613Z", - "revisionId": "{{.s3DestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "useGlue", - "region", - "bucketName", - "accessKeyID", - "accessKey", - "prefix", - "namespace", - "syncFrequency", - "syncStartAt", - "excludeWindow" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "warehouse", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": {}, - "options": null, - "uiConfig": null, - "id": "1xAu2vuR0scUwkBivf6VhqwWgcS", - "name": "S3_DATALAKE", - "displayName": "S3 Datalake", - "category": "warehouse", - "createdAt": "2021-08-24T13:00:10.427Z", - "updatedAt": "2022-07-20T17:06:00.870Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "id": "{{.gcsSourceID}}", - "name": "gcs-datalake-integration", - "writeKey": "{{.gcsWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "279Kz3NhcXsUAx2KHnx1HKhdtbk", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-03-31T11:45:57.339Z", - "updatedAt": "2022-05-23T22:56:32.000Z", - "destinations": [ - { - "config": { - "bucketName": "{{.gcsBucketName}}", - "prefix": "", - "endPoint": "{{.gcsEndPoint}}", - "syncFrequency": "30", - "disableSSL": true, - "jsonReads": true - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.gcsDestinationID}}", - "name": "gcs-datalake-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-04-07T09:57:14.783Z", - "updatedAt": "2022-05-17T08:16:32.613Z", - "revisionId": "{{.gcsDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "bucketName", - "prefix", - "namespace", - "tableSuffix", - "timeWindowLayout", - "credentials", - "syncFrequency", - "syncStartAt" - ] - }, - "secretKeys": [], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "warehouse", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": {}, - "options": null, - "uiConfig": null, - "id": "20lzWVRwzEimkq87sNQuz1or2GA", - "name": "GCS_DATALAKE", - "displayName": "Google Cloud Storage Datalake", - "category": "warehouse", - "createdAt": "2021-11-11T12:33:13.960Z", - "updatedAt": "2022-07-20T17:05:23.745Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - } - ], - "libraries": [ - { - "versionId": "23Uxw7QEiOg8e0KkQV8LmNfWaWh" - } - ] -} diff --git a/warehouse/integrations/datalake/testdata/trino.staging.json b/warehouse/integrations/datalake/testdata/trino.staging.json new file mode 100644 index 00000000000..085cda2e0a2 --- /dev/null +++ b/warehouse/integrations/datalake/testdata/trino.staging.json @@ -0,0 +1,8 @@ +{"data": {"id": "920add93-60fe-4b72-b8a2-785633e24e73", "event": "product_track", "sent_at": "2023-05-12T04:29:04.211Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:00.167Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:00.168Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:04.211Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:00.168+05:30"}} +{"data": {"id": "213f65b4-e283-41ae-9df5-bd84592aa6c6", "event": "product_track", "sent_at": "2023-05-12T04:29:04.211Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:00.455Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:00.456Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:04.211Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:00.456+05:30"}} +{"data": {"id": "f56ddbcc-f8d8-49d8-b69f-d9236dd2397c", "event": "product_track", "sent_at": "2023-05-12T04:29:04.212Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:00.822Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:00.823Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:04.212Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:00.823+05:30"}} +{"data": {"id": "3ba02184-51f1-4382-bd46-b1b499ff999d", "event": "product_track", "sent_at": "2023-05-12T04:29:04.213Z", "user_id": "{{.userID}}", "timestamp": "2023-05-12T04:29:01.250Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:01.251Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:04.213Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:01.251+05:30"}} +{"data": {"id": "9b9c4cba-9f08-4283-8f43-e7f1a7fd3589", "event": "product_track", "sent_at": "2023-05-12T04:29:37.772Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:36.953Z", "context_ip": "14.5.67.21", "event_text": "Product Track", "received_at": "2023-05-12T04:29:36.954Z", "context_passed_ip": "14.5.67.21", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:37.772Z", "context_source_type": "HTTP", "context_library_name": "http", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_passed_ip": "string", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_library_name": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:36.954+05:30"}} +{"data": {"id": "1b49518c-671c-43f3-800f-5e44c1aae772", "event": "product_track", "sent_at": "2023-05-12T04:29:37.773Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:37.228Z", "context_ip": "14.5.67.21", "event_text": "Product Track", "received_at": "2023-05-12T04:29:37.229Z", "context_passed_ip": "14.5.67.21", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:37.773Z", "context_source_type": "HTTP", "context_library_name": "http", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_passed_ip": "string", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_library_name": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:37.229+05:30"}} +{"data": {"id": "6cc21bde-c601-4b78-9eaa-b26c3fe4e808", "event": "product_track", "sent_at": "2023-05-12T04:29:38.607Z", "user_id": "{{.userID}}", "_timestamp": "2023-05-12T04:29:37.671Z", "context_ip": "14.5.67.21", "event_text": "Product Track", "received_at": "2023-05-12T04:29:37.672Z", "context_passed_ip": "14.5.67.21", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:38.607Z", "context_source_type": "HTTP", "context_library_name": "http", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "_timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_passed_ip": "string", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_library_name": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:37.672+05:30"}} +{"data": {"id": "1a922433-2a6d-47a3-95bb-1b7415f45bc5", "event": "product_track", "sent_at": "2023-05-12T04:29:38.608Z", "user_id": "{{.userID}}", "timestamp": "2023-05-12T04:29:38.052Z", "context_ip": "[::1]", "event_text": "Product Track", "received_at": "2023-05-12T04:29:38.053Z", "context_source_id": "{{.sourceID}}", "context_request_ip": "[::1]", "original_timestamp": "2023-05-12T04:29:38.608Z", "context_source_type": "HTTP", "context_destination_id": "{{.destID}}", "context_destination_type": "S3_DATALAKE"}, "userId": "", "metadata": {"table": "tracks", "columns": {"id": "string", "event": "string", "sent_at": "datetime", "user_id": "string", "uuid_ts": "datetime", "timestamp": "datetime", "context_ip": "string", "event_text": "string", "received_at": "datetime", "context_source_id": "string", "context_request_ip": "string", "original_timestamp": "datetime", "context_source_type": "string", "context_destination_id": "string", "context_destination_type": "string"}, "receivedAt": "2023-05-12T09:59:38.053+05:30"}} diff --git a/warehouse/integrations/deltalake/deltalake_test.go b/warehouse/integrations/deltalake/deltalake_test.go index ce6810ab7c4..6eb98ed9e96 100644 --- a/warehouse/integrations/deltalake/deltalake_test.go +++ b/warehouse/integrations/deltalake/deltalake_test.go @@ -29,17 +29,15 @@ import ( kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/runner" th "github.com/rudderlabs/rudder-server/testhelper" - "github.com/rudderlabs/rudder-server/testhelper/health" - "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" "github.com/rudderlabs/rudder-server/utils/misc" warehouseclient "github.com/rudderlabs/rudder-server/warehouse/client" "github.com/rudderlabs/rudder-server/warehouse/integrations/deltalake" whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" mockuploader "github.com/rudderlabs/rudder-server/warehouse/internal/mocks/utils" "github.com/rudderlabs/rudder-server/warehouse/internal/model" - warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" "github.com/rudderlabs/rudder-server/warehouse/validations" ) @@ -74,174 +72,150 @@ func TestIntegration(t *testing.T) { t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.") } if _, exists := os.LookupEnv(testKey); !exists { + if os.Getenv("FORCE_RUN_INTEGRATION_TESTS") == "true" { + t.Fatalf("%s environment variable not set", testKey) + } t.Skipf("Skipping %s as %s is not set", t.Name(), testKey) } - c := testcompose.New(t, compose.FilePaths([]string{"../testdata/docker-compose.jobsdb.yml"})) - c.Start(context.Background()) - misc.Init() validations.Init() - warehouseutils.Init() - - jobsDBPort := c.Port("jobsDb", 5432) - - httpPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - workspaceID := warehouseutils.RandHex() - sourceID := warehouseutils.RandHex() - destinationID := warehouseutils.RandHex() - writeKey := warehouseutils.RandHex() - destType := warehouseutils.DELTALAKE - namespace := whth.RandSchema(destType) + whutils.Init() - deltaLakeCredentials, err := deltaLakeTestCredentials() - require.NoError(t, err) + destType := whutils.DELTALAKE - port, err := strconv.Atoi(deltaLakeCredentials.Port) + credentials, err := deltaLakeTestCredentials() require.NoError(t, err) - connector, err := dbsql.NewConnector( - dbsql.WithServerHostname(deltaLakeCredentials.Host), - dbsql.WithPort(port), - dbsql.WithHTTPPath(deltaLakeCredentials.Path), - dbsql.WithAccessToken(deltaLakeCredentials.Token), - dbsql.WithSessionParams(map[string]string{ - "ansi_mode": "false", - }), - ) - require.NoError(t, err) + t.Run("Event flow", func(t *testing.T) { + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) - db := sql.OpenDB(connector) - require.NoError(t, db.Ping()) + c := testcompose.New(t, compose.FilePaths([]string{"../testdata/docker-compose.jobsdb.yml"})) + c.Start(context.Background()) - bootstrapSvc := func(t *testing.T, preferAppend *bool) { - var preferAppendStr string - if preferAppend != nil { - preferAppendStr = fmt.Sprintf(`"preferAppend": %v,`, *preferAppend) - } - templateConfigurations := map[string]any{ - "workspaceID": workspaceID, - "sourceID": sourceID, - "destinationID": destinationID, - "writeKey": writeKey, - "host": deltaLakeCredentials.Host, - "port": deltaLakeCredentials.Port, - "path": deltaLakeCredentials.Path, - "token": deltaLakeCredentials.Token, - "namespace": namespace, - "containerName": deltaLakeCredentials.ContainerName, - "accountName": deltaLakeCredentials.AccountName, - "accountKey": deltaLakeCredentials.AccountKey, - "preferAppend": preferAppendStr, - } - workspaceConfigPath := workspaceConfig.CreateTempFile(t, "testdata/template.json", templateConfigurations) - - whth.EnhanceWithDefaultEnvs(t) - t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("RSERVER_WAREHOUSE_DELTALAKE_MAX_PARALLEL_LOADS", "8") - t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_JSONPATH", workspaceConfigPath) - t.Setenv("RSERVER_WAREHOUSE_DELTALAKE_SLOW_QUERY_THRESHOLD", "0s") - - svcDone := make(chan struct{}) - ctx, cancel := context.WithCancel(context.Background()) - - go func() { - r := runner.New(runner.ReleaseInfo{}) - _ = r.Run(ctx, []string{"deltalake-integration-test"}) - close(svcDone) - }() - - t.Cleanup(func() { <-svcDone }) - t.Cleanup(cancel) - - serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) - health.WaitUntilReady(ctx, t, - serviceHealthEndpoint, time.Minute, time.Second, "serviceHealthEndpoint", - ) - } + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) - t.Run("Event flow", func(t *testing.T) { jobsDB := whth.JobsDB(t, jobsDBPort) - t.Cleanup(func() { - dropSchema(t, db, namespace) - }) - testCases := []struct { name string - writeKey string - schema string - sourceID string - destinationID string messageID string warehouseEventsMap whth.EventsCountMap - preferAppend *bool useParquetLoadFiles bool stagingFilePrefix string jobRunID string + configOverride map[string]any }{ { name: "Merge Mode", - writeKey: writeKey, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap: mergeEventsMap(), - preferAppend: th.Ptr(false), useParquetLoadFiles: false, stagingFilePrefix: "testdata/upload-job-merge-mode", jobRunID: misc.FastUUID().String(), + configOverride: map[string]any{ + "preferAppend": false, + }, }, { name: "Append Mode", - writeKey: writeKey, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap: appendEventsMap(), - preferAppend: th.Ptr(true), useParquetLoadFiles: false, stagingFilePrefix: "testdata/upload-job-append-mode", // an empty jobRunID means that the source is not an ETL one // see Uploader.CanAppend() jobRunID: "", + configOverride: map[string]any{ + "preferAppend": true, + }, }, { name: "Undefined preferAppend", - writeKey: writeKey, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap: mergeEventsMap(), - preferAppend: nil, // not defined in backend config useParquetLoadFiles: false, stagingFilePrefix: "testdata/upload-job-undefined-preferAppend-mode", jobRunID: misc.FastUUID().String(), }, { name: "Parquet load files", - writeKey: writeKey, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap: mergeEventsMap(), - preferAppend: th.Ptr(false), useParquetLoadFiles: true, stagingFilePrefix: "testdata/upload-job-parquet", jobRunID: misc.FastUUID().String(), + configOverride: map[string]any{ + "preferAppend": false, + }, }, } for _, tc := range testCases { - tc := tc t.Run(tc.name, func(t *testing.T) { - bootstrapSvc(t, tc.preferAppend) + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + namespace = whth.RandSchema(destType) + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("host", credentials.Host). + WithConfigOption("port", credentials.Port). + WithConfigOption("path", credentials.Path). + WithConfigOption("token", credentials.Token). + WithConfigOption("namespace", namespace). + WithConfigOption("bucketProvider", "AZURE_BLOB"). + WithConfigOption("containerName", credentials.ContainerName). + WithConfigOption("useSTSTokens", false). + WithConfigOption("enableSSE", false). + WithConfigOption("accountName", credentials.AccountName). + WithConfigOption("accountKey", credentials.AccountKey). + WithConfigOption("syncFrequency", "30") + for k, v := range tc.configOverride { + destinationBuilder = destinationBuilder.WithConfigOption(k, v) + } + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("RSERVER_WAREHOUSE_DELTALAKE_MAX_PARALLEL_LOADS", "8") + t.Setenv("RSERVER_WAREHOUSE_DELTALAKE_SLOW_QUERY_THRESHOLD", "0s") t.Setenv("RSERVER_WAREHOUSE_DELTALAKE_USE_PARQUET_LOAD_FILES", strconv.FormatBool(tc.useParquetLoadFiles)) + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + port, err := strconv.Atoi(credentials.Port) + require.NoError(t, err) + + connector, err := dbsql.NewConnector( + dbsql.WithServerHostname(credentials.Host), + dbsql.WithPort(port), + dbsql.WithHTTPPath(credentials.Path), + dbsql.WithAccessToken(credentials.Token), + dbsql.WithSessionParams(map[string]string{ + "ansi_mode": "false", + }), + ) + require.NoError(t, err) + + db := sql.OpenDB(connector) + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) + t.Cleanup(func() { + dropSchema(t, db, namespace) + }) + sqlClient := &warehouseclient.Client{ SQL: db, Type: warehouseclient.SQLClient, @@ -249,22 +223,22 @@ func TestIntegration(t *testing.T) { conf := map[string]interface{}{ "bucketProvider": "AZURE_BLOB", - "containerName": deltaLakeCredentials.ContainerName, + "containerName": credentials.ContainerName, "prefix": "", "useSTSTokens": false, "enableSSE": false, - "accountName": deltaLakeCredentials.AccountName, - "accountKey": deltaLakeCredentials.AccountKey, + "accountName": credentials.AccountName, + "accountKey": credentials.AccountKey, } tables := []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"} t.Log("verifying test case 1") ts1 := whth.TestConfig{ WriteKey: writeKey, - Schema: tc.schema, + Schema: namespace, Tables: tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, JobRunID: tc.jobRunID, WarehouseEventsMap: whth.EventsCountMap{ "identifies": 1, @@ -290,10 +264,10 @@ func TestIntegration(t *testing.T) { t.Log("verifying test case 2") ts2 := whth.TestConfig{ WriteKey: writeKey, - Schema: tc.schema, + Schema: namespace, Tables: tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, JobRunID: tc.jobRunID, WarehouseEventsMap: tc.warehouseEventsMap, Config: conf, @@ -311,25 +285,44 @@ func TestIntegration(t *testing.T) { }) t.Run("Validation", func(t *testing.T) { + namespace := whth.RandSchema(destType) + + port, err := strconv.Atoi(credentials.Port) + require.NoError(t, err) + + connector, err := dbsql.NewConnector( + dbsql.WithServerHostname(credentials.Host), + dbsql.WithPort(port), + dbsql.WithHTTPPath(credentials.Path), + dbsql.WithAccessToken(credentials.Token), + dbsql.WithSessionParams(map[string]string{ + "ansi_mode": "false", + }), + ) + require.NoError(t, err) + + db := sql.OpenDB(connector) + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) t.Cleanup(func() { dropSchema(t, db, namespace) }) dest := backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ - "host": deltaLakeCredentials.Host, - "port": deltaLakeCredentials.Port, - "path": deltaLakeCredentials.Path, - "token": deltaLakeCredentials.Token, + "host": credentials.Host, + "port": credentials.Port, + "path": credentials.Path, + "token": credentials.Token, "namespace": namespace, "bucketProvider": "AZURE_BLOB", - "containerName": deltaLakeCredentials.ContainerName, + "containerName": credentials.ContainerName, "prefix": "", "useSTSTokens": false, "enableSSE": false, - "accountName": deltaLakeCredentials.AccountName, - "accountKey": deltaLakeCredentials.AccountKey, + "accountName": credentials.AccountName, + "accountKey": credentials.AccountKey, "syncFrequency": "30", "eventDelivery": false, "eventDeliveryTS": 1648195480174, @@ -368,7 +361,6 @@ func TestIntegration(t *testing.T) { } for _, tc := range testCases { - tc := tc t.Run(tc.name, func(t *testing.T) { t.Setenv( "RSERVER_WAREHOUSE_DELTALAKE_USE_PARQUET_LOAD_FILES", @@ -385,12 +377,6 @@ func TestIntegration(t *testing.T) { }) t.Run("Load Table", func(t *testing.T) { - const ( - sourceID = "test_source_id" - destinationID = "test_destination_id" - workspaceID = "test_workspace_id" - ) - ctx := context.Background() namespace := whth.RandSchema(destType) @@ -420,36 +406,36 @@ func TestIntegration(t *testing.T) { warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", DestinationDefinition: backendconfig.DestinationDefinitionT{ Name: destType, }, Config: map[string]any{ - "host": deltaLakeCredentials.Host, - "port": deltaLakeCredentials.Port, - "path": deltaLakeCredentials.Path, - "token": deltaLakeCredentials.Token, + "host": credentials.Host, + "port": credentials.Port, + "path": credentials.Path, + "token": credentials.Token, "namespace": namespace, - "bucketProvider": warehouseutils.AzureBlob, - "containerName": deltaLakeCredentials.ContainerName, - "accountName": deltaLakeCredentials.AccountName, - "accountKey": deltaLakeCredentials.AccountKey, + "bucketProvider": whutils.AzureBlob, + "containerName": credentials.ContainerName, + "accountName": credentials.AccountName, + "accountKey": credentials.AccountKey, }, }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } fm, err := filemanager.New(&filemanager.Settings{ - Provider: warehouseutils.AzureBlob, + Provider: whutils.AzureBlob, Config: map[string]any{ - "containerName": deltaLakeCredentials.ContainerName, - "accountName": deltaLakeCredentials.AccountName, - "accountKey": deltaLakeCredentials.AccountKey, - "bucketProvider": warehouseutils.AzureBlob, + "containerName": credentials.ContainerName, + "accountName": credentials.AccountName, + "accountKey": credentials.AccountKey, + "bucketProvider": whutils.AzureBlob, }, }) require.NoError(t, err) @@ -459,8 +445,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -475,8 +461,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -497,10 +483,10 @@ func TestIntegration(t *testing.T) { tableName := "merge_without_dedup_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader( t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, - warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z", + whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z", ) d := deltalake.New(config.New(), logger.NOP, stats.NOP) @@ -548,8 +534,8 @@ func TestIntegration(t *testing.T) { tableName := "merge_with_dedup_use_new_record_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, true, true, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, true, true, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -598,8 +584,8 @@ func TestIntegration(t *testing.T) { tableName := "merge_with_no_overlapping_partition_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, true, false, "2022-11-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, true, false, "2022-11-15T06:53:49.640Z") appendWarehouse := th.Clone(t, warehouse) appendWarehouse.Destination.Config["preferAppend"] = false @@ -649,8 +635,8 @@ func TestIntegration(t *testing.T) { tableName := "merge_with_no_overlapping_partition_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, true, false, "2022-11-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, true, false, "2022-11-15T06:53:49.640Z") appendWarehouse := th.Clone(t, warehouse) appendWarehouse.Destination.Config["preferAppend"] = true @@ -702,8 +688,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, true, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, true, false, "2022-12-15T06:53:49.640Z") appendWarehouse := th.Clone(t, warehouse) appendWarehouse.Destination.Config[model.PreferAppendSetting.String()] = true @@ -752,13 +738,13 @@ func TestIntegration(t *testing.T) { t.Run("load file does not exists", func(t *testing.T) { tableName := "load_file_not_exists_test_table" - loadFiles := []warehouseutils.LoadFile{{ + loadFiles := []whutils.LoadFile{{ Location: fmt.Sprintf("https://%s.blob.core.windows.net/%s/rudder-warehouse-load-objects/load_file_not_exists_test_table/test_source_id/a01af26e-4548-49ff-a895-258829cc1a83-load_file_not_exists_test_table/load.csv.gz", - deltaLakeCredentials.AccountName, - deltaLakeCredentials.ContainerName, + credentials.AccountName, + credentials.ContainerName, ), }} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -782,8 +768,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -826,8 +812,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -866,12 +852,12 @@ func TestIntegration(t *testing.T) { require.Equal(t, records, whth.MismatchSchemaTestRecords()) }) t.Run("discards", func(t *testing.T) { - tableName := warehouseutils.DiscardsTable + tableName := whutils.DiscardsTable uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, warehouseutils.DiscardsSchema, warehouseutils.DiscardsSchema, warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, whutils.DiscardsSchema, whutils.DiscardsSchema, whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -883,7 +869,7 @@ func TestIntegration(t *testing.T) { dropSchema(t, d.DB.DB, namespace) }) - err = d.CreateTable(ctx, tableName, warehouseutils.DiscardsSchema) + err = d.CreateTable(ctx, tableName, whutils.DiscardsSchema) require.NoError(t, err) loadTableStat, err := d.LoadTable(ctx, tableName) @@ -913,8 +899,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.parquet", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeParquet, false, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeParquet, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -958,8 +944,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -1019,8 +1005,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv, false, false, "2022-12-15T06:53:49.640Z") d := deltalake.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -1079,37 +1065,31 @@ func TestIntegration(t *testing.T) { }) t.Run("Fetch Schema", func(t *testing.T) { - const ( - sourceID = "test_source_id" - destinationID = "test_destination_id" - workspaceID = "test_workspace_id" - ) - ctx := context.Background() namespace := whth.RandSchema(destType) warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", DestinationDefinition: backendconfig.DestinationDefinitionT{ Name: destType, }, Config: map[string]any{ - "host": deltaLakeCredentials.Host, - "port": deltaLakeCredentials.Port, - "path": deltaLakeCredentials.Path, - "token": deltaLakeCredentials.Token, + "host": credentials.Host, + "port": credentials.Port, + "path": credentials.Path, + "token": credentials.Token, "namespace": namespace, - "bucketProvider": warehouseutils.AzureBlob, - "containerName": deltaLakeCredentials.ContainerName, - "accountName": deltaLakeCredentials.AccountName, - "accountKey": deltaLakeCredentials.AccountKey, + "bucketProvider": whutils.AzureBlob, + "containerName": credentials.ContainerName, + "accountName": credentials.AccountName, + "accountKey": credentials.AccountKey, }, }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } @@ -1202,7 +1182,7 @@ func TestIntegration(t *testing.T) { missingDatatypeStats := []string{"void", "timestamp_ntz", "struct", "array", "binary", "map", "decimal(10,2)"} for _, missingDatatype := range missingDatatypeStats { - require.EqualValues(t, 1, statsStore.Get(warehouseutils.RudderMissingDatatype, stats.Tags{ + require.EqualValues(t, 1, statsStore.Get(whutils.RudderMissingDatatype, stats.Tags{ "module": "warehouse", "destType": warehouse.Type, "workspaceId": warehouse.WorkspaceID, @@ -1217,10 +1197,11 @@ func TestIntegration(t *testing.T) { func dropSchema(t *testing.T, db *sql.DB, namespace string) { t.Helper() + t.Log("dropping schema", namespace) require.Eventually(t, func() bool { - _, err := db.Exec(fmt.Sprintf(`DROP SCHEMA %s CASCADE;`, namespace)) + _, err := db.ExecContext(context.Background(), fmt.Sprintf(`DROP SCHEMA %s CASCADE;`, namespace)) if err != nil { t.Logf("error deleting schema %q: %v", namespace, err) return false @@ -1258,8 +1239,6 @@ func TestDeltalake_TrimErrorMessage(t *testing.T) { } for _, tc := range testCases { - tc := tc - t.Run(tc.name, func(t *testing.T) { c := config.New() c.Set("Warehouse.deltalake.maxErrorLength", len(tempError.Error())*25) @@ -1331,7 +1310,7 @@ func TestDeltalake_ShouldMerge(t *testing.T) { func newMockUploader( t testing.TB, - loadFiles []warehouseutils.LoadFile, + loadFiles []whutils.LoadFile, tableName string, schemaInUpload model.TableSchema, schemaInWarehouse model.TableSchema, @@ -1339,7 +1318,7 @@ func newMockUploader( canAppend bool, onDedupUseNewRecords bool, eventTS string, -) warehouseutils.Uploader { +) whutils.Uploader { ctrl := gomock.NewController(t) t.Cleanup(ctrl.Finish) @@ -1351,7 +1330,7 @@ func newMockUploader( mockUploader.EXPECT().ShouldOnDedupUseNewRecord().Return(onDedupUseNewRecords).AnyTimes() mockUploader.EXPECT().CanAppend().Return(canAppend).AnyTimes() mockUploader.EXPECT().GetLoadFilesMetadata(gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, options warehouseutils.GetLoadFilesOptions) ([]warehouseutils.LoadFile, error) { + func(ctx context.Context, options whutils.GetLoadFilesOptions) ([]whutils.LoadFile, error) { return slices.Clone(loadFiles), nil }, ).AnyTimes() diff --git a/warehouse/integrations/deltalake/testdata/template.json b/warehouse/integrations/deltalake/testdata/template.json deleted file mode 100644 index e9ed673e063..00000000000 --- a/warehouse/integrations/deltalake/testdata/template.json +++ /dev/null @@ -1,142 +0,0 @@ -{ - "enableMetrics": false, - "workspaceId": "{{.workspaceID}}", - "sources": [ - { - "config": { - "eventUpload": false, - "eventUploadTS": 1648195575635 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1648195575635 - }, - "id": "{{.sourceID}}", - "name": "deltalake-integration", - "writeKey": "{{.writeKey}}", - "enabled": true, - "sourceDefinitionId": "1TW48i2bIzEl1HPf825cEznfIM8", - "createdBy": "1wDN9v6bW9ymBaR7b7r0EjV3leC", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-18T07:57:42.736Z", - "updatedAt": "2022-03-25T08:06:15.642Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "host": "{{.host}}", - "port": "{{.port}}", - "path": "{{.path}}", - "token": "{{.token}}", - "namespace": "{{.namespace}}", - "bucketProvider": "AZURE_BLOB", - "containerName": "{{.containerName}}", - "prefix": "", - "useSTSTokens": false, - "enableSSE": false, - "accountName": "{{.accountName}}", - "accountKey": "{{.accountKey}}", - "syncFrequency": "30", - "eventDelivery": false, - "eventDeliveryTS": 1648195480174 - }, - "liveEventsConfig": { - "eventDelivery": false, - "eventDeliveryTS": 1648195480174 - }, - "secretConfig": {}, - "id": "{{.destinationID}}", - "name": "deltalake-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-18T17:37:22.146Z", - "updatedAt": "2022-05-25T07:38:38.033Z", - "revisionId": "{{.destinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "port", - "path", - "token", - "namespace", - "bucketProvider", - "bucketName", - "containerName", - "prefix", - "useSTSTokens", - "accessKeyID", - "accessKey", - "enableSSE", - "accountName", - "accountKey", - "credentials", - "syncFrequency", - "syncStartAt", - "excludeWindow", - "enableExternalLocation", - "externalLocation" - ] - }, - "secretKeys": [ - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "token" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "warehouse", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": {}, - "options": null, - "id": "23HLpnDJnIg7DsBvDWGU6DQzFEo", - "name": "DELTALAKE", - "displayName": "Databricks (Delta Lake)", - "category": "warehouse", - "createdAt": "2022-01-05T12:37:30.136Z", - "updatedAt": "2022-05-25T15:07:06.596Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1TW48i2bIzEl1HPf825cEznfIM8", - "name": "Javascript", - "displayName": "Javascript", - "category": null, - "createdAt": "2019-11-12T12:39:19.885Z", - "updatedAt": "2020-06-18T11:54:06.114Z" - }, - "dgSourceTrackingPlanConfig": null - } - ], - "libraries": [ - { - "versionId": "23Uxw7QEiOg8e0KkQV8LmNfWaWh" - } - ] -} diff --git a/warehouse/integrations/mssql/mssql_test.go b/warehouse/integrations/mssql/mssql_test.go index 746c46b9da2..458e8fb2cc0 100644 --- a/warehouse/integrations/mssql/mssql_test.go +++ b/warehouse/integrations/mssql/mssql_test.go @@ -17,27 +17,26 @@ import ( "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" + + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" "github.com/rudderlabs/rudder-server/warehouse/integrations/mssql" mockuploader "github.com/rudderlabs/rudder-server/warehouse/internal/mocks/utils" "github.com/rudderlabs/rudder-server/warehouse/internal/model" "github.com/rudderlabs/compose-test/compose" - "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" - "github.com/stretchr/testify/require" "github.com/rudderlabs/compose-test/testcompose" kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" - "github.com/rudderlabs/rudder-server/runner" - "github.com/rudderlabs/rudder-server/testhelper/health" + "github.com/rudderlabs/rudder-server/warehouse/client" - "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" + whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" "github.com/rudderlabs/rudder-server/warehouse/validations" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/utils/misc" - warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) func TestIntegration(t *testing.T) { @@ -45,155 +44,115 @@ func TestIntegration(t *testing.T) { t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.") } - c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) - c.Start(context.Background()) - misc.Init() validations.Init() - warehouseutils.Init() - - jobsDBPort := c.Port("jobsDb", 5432) - minioPort := c.Port("minio", 9000) - mssqlPort := c.Port("mssql", 1433) - - httpPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - workspaceID := warehouseutils.RandHex() - sourceID := warehouseutils.RandHex() - destinationID := warehouseutils.RandHex() - writeKey := warehouseutils.RandHex() - sourcesSourceID := warehouseutils.RandHex() - sourcesDestinationID := warehouseutils.RandHex() - sourcesWriteKey := warehouseutils.RandHex() + whutils.Init() - destType := warehouseutils.MSSQL - - namespace := testhelper.RandSchema(destType) - sourcesNamespace := testhelper.RandSchema(destType) + destType := whutils.MSSQL host := "localhost" database := "master" user := "SA" password := "reallyStrongPwd123" - bucketName := "testbucket" accessKeyID := "MYACCESSKEY" secretAccessKey := "MYSECRETKEY" region := "us-east-1" - minioEndpoint := fmt.Sprintf("localhost:%d", minioPort) - - templateConfigurations := map[string]any{ - "workspaceID": workspaceID, - "sourceID": sourceID, - "destinationID": destinationID, - "writeKey": writeKey, - "sourcesSourceID": sourcesSourceID, - "sourcesDestinationID": sourcesDestinationID, - "sourcesWriteKey": sourcesWriteKey, - "host": host, - "database": database, - "user": user, - "password": password, - "port": strconv.Itoa(mssqlPort), - "namespace": namespace, - "sourcesNamespace": sourcesNamespace, - "bucketName": bucketName, - "accessKeyID": accessKeyID, - "secretAccessKey": secretAccessKey, - "endPoint": minioEndpoint, - } - workspaceConfigPath := workspaceConfig.CreateTempFile(t, "testdata/template.json", templateConfigurations) - - testhelper.EnhanceWithDefaultEnvs(t) - t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("MINIO_ACCESS_KEY_ID", accessKeyID) - t.Setenv("MINIO_SECRET_ACCESS_KEY", secretAccessKey) - t.Setenv("MINIO_MINIO_ENDPOINT", minioEndpoint) - t.Setenv("MINIO_SSL", "false") - t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_JSONPATH", workspaceConfigPath) - - svcDone := make(chan struct{}) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - go func() { - r := runner.New(runner.ReleaseInfo{}) - _ = r.Run(ctx, []string{"mssql-integration-test"}) + t.Run("Events flow", func(t *testing.T) { + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) - close(svcDone) - }() - t.Cleanup(func() { <-svcDone }) + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) - serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) - health.WaitUntilReady(ctx, t, serviceHealthEndpoint, time.Minute, time.Second, "serviceHealthEndpoint") + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) + mssqlPort := c.Port("mssql", 1433) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) - t.Run("Events flow", func(t *testing.T) { - t.Setenv("RSERVER_WAREHOUSE_MSSQL_SLOW_QUERY_THRESHOLD", "0s") - t.Setenv("RSERVER_WAREHOUSE_MSSQL_MAX_PARALLEL_LOADS", "8") - t.Setenv("RSERVER_WAREHOUSE_MSSQL_ENABLE_DELETE_BY_JOBS", "true") - - jobsDB := testhelper.JobsDB(t, jobsDBPort) - - dsn := fmt.Sprintf("sqlserver://%s:%s@%s:%d?TrustServerCertificate=true&database=%s&encrypt=disable", - user, - password, - host, - mssqlPort, - database, - ) - db, err := sql.Open("sqlserver", dsn) - require.NoError(t, err) - require.NoError(t, db.Ping()) + jobsDB := whth.JobsDB(t, jobsDBPort) testcase := []struct { name string - writeKey string - schema string - sourceID string - destinationID string tables []string - stagingFilesEventsMap testhelper.EventsCountMap - loadFilesEventsMap testhelper.EventsCountMap - tableUploadsEventsMap testhelper.EventsCountMap - warehouseEventsMap testhelper.EventsCountMap + stagingFilesEventsMap whth.EventsCountMap + loadFilesEventsMap whth.EventsCountMap + tableUploadsEventsMap whth.EventsCountMap + warehouseEventsMap whth.EventsCountMap sourceJob bool stagingFilePrefix string }{ { name: "Upload Job", - writeKey: writeKey, - schema: namespace, tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, - sourceID: sourceID, - destinationID: destinationID, stagingFilePrefix: "testdata/upload-job", }, { name: "Source Job", - writeKey: sourcesWriteKey, - schema: sourcesNamespace, tables: []string{"tracks", "google_sheet"}, - sourceID: sourcesSourceID, - destinationID: sourcesDestinationID, - stagingFilesEventsMap: testhelper.SourcesStagingFilesEventsMap(), - loadFilesEventsMap: testhelper.SourcesLoadFilesEventsMap(), - tableUploadsEventsMap: testhelper.SourcesTableUploadsEventsMap(), - warehouseEventsMap: testhelper.SourcesWarehouseEventsMap(), + stagingFilesEventsMap: whth.SourcesStagingFilesEventsMap(), + loadFilesEventsMap: whth.SourcesLoadFilesEventsMap(), + tableUploadsEventsMap: whth.SourcesTableUploadsEventsMap(), + warehouseEventsMap: whth.SourcesWarehouseEventsMap(), sourceJob: true, stagingFilePrefix: "testdata/sources-job", }, } for _, tc := range testcase { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + namespace = whth.RandSchema(destType) + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("host", host). + WithConfigOption("database", database). + WithConfigOption("user", user). + WithConfigOption("password", password). + WithConfigOption("port", strconv.Itoa(mssqlPort)). + WithConfigOption("sslMode", "disable"). + WithConfigOption("namespace", namespace). + WithConfigOption("bucketProvider", whutils.MINIO). + WithConfigOption("bucketName", bucketName). + WithConfigOption("accessKeyID", accessKeyID). + WithConfigOption("secretAccessKey", secretAccessKey). + WithConfigOption("useSSL", false). + WithConfigOption("endPoint", minioEndpoint). + WithConfigOption("useRudderStorage", false). + WithConfigOption("syncFrequency", "30") + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("RSERVER_WAREHOUSE_MSSQL_SLOW_QUERY_THRESHOLD", "0s") + t.Setenv("RSERVER_WAREHOUSE_MSSQL_MAX_PARALLEL_LOADS", "8") + t.Setenv("RSERVER_WAREHOUSE_MSSQL_ENABLE_DELETE_BY_JOBS", "true") + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + dsn := fmt.Sprintf("sqlserver://%s:%s@%s:%d?TrustServerCertificate=true&database=%s&encrypt=disable", + user, password, host, mssqlPort, database, + ) + db, err := sql.Open("sqlserver", dsn) + require.NoError(t, err) + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) sqlClient := &client.Client{ SQL: db, @@ -201,7 +160,7 @@ func TestIntegration(t *testing.T) { } conf := map[string]interface{}{ - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -211,12 +170,12 @@ func TestIntegration(t *testing.T) { } t.Log("verifying test case 1") - ts1 := testhelper.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + ts1 := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, TableUploadsEventsMap: tc.tableUploadsEventsMap, @@ -230,17 +189,17 @@ func TestIntegration(t *testing.T) { JobRunID: misc.FastUUID().String(), TaskRunID: misc.FastUUID().String(), StagingFilePath: tc.stagingFilePrefix + ".staging-1.json", - UserID: testhelper.GetUserId(destType), + UserID: whth.GetUserId(destType), } ts1.VerifyEvents(t) t.Log("verifying test case 2") - ts2 := testhelper.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + ts2 := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, TableUploadsEventsMap: tc.tableUploadsEventsMap, @@ -255,7 +214,7 @@ func TestIntegration(t *testing.T) { JobRunID: misc.FastUUID().String(), TaskRunID: misc.FastUUID().String(), StagingFilePath: tc.stagingFilePrefix + ".staging-2.json", - UserID: testhelper.GetUserId(destType), + UserID: whth.GetUserId(destType), } if tc.sourceJob { ts2.UserID = ts1.UserID @@ -266,8 +225,16 @@ func TestIntegration(t *testing.T) { }) t.Run("Validations", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + mssqlPort := c.Port("mssql", 1433) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + + namespace := whth.RandSchema(destType) + dest := backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ "host": host, "database": database, @@ -275,8 +242,8 @@ func TestIntegration(t *testing.T) { "password": password, "port": strconv.Itoa(mssqlPort), "sslMode": "disable", - "namespace": "", - "bucketProvider": "MINIO", + "namespace": namespace, + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -292,19 +259,20 @@ func TestIntegration(t *testing.T) { }, Name: "mssql-demo", Enabled: true, - RevisionID: destinationID, + RevisionID: "test_destination_id", } - testhelper.VerifyConfigurationTest(t, dest) + whth.VerifyConfigurationTest(t, dest) }) t.Run("Load Table", func(t *testing.T) { - const ( - sourceID = "test_source_id" - destinationID = "test_destination_id" - workspaceID = "test_workspace_id" - ) + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + mssqlPort := c.Port("mssql", 1433) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) - namespace := testhelper.RandSchema(destType) + ctx := context.Background() + namespace := whth.RandSchema(destType) schemaInUpload := model.TableSchema{ "test_bool": "boolean", @@ -332,10 +300,10 @@ func TestIntegration(t *testing.T) { warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", DestinationDefinition: backendconfig.DestinationDefinitionT{ Name: destType, }, @@ -347,7 +315,7 @@ func TestIntegration(t *testing.T) { "port": strconv.Itoa(mssqlPort), "sslMode": "disable", "namespace": "", - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -357,12 +325,12 @@ func TestIntegration(t *testing.T) { "useRudderStorage": false, }, }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } fm, err := filemanager.New(&filemanager.Settings{ - Provider: warehouseutils.MINIO, + Provider: whutils.MINIO, Config: map[string]any{ "bucketName": bucketName, "accessKeyID": accessKeyID, @@ -373,7 +341,7 @@ func TestIntegration(t *testing.T) { "disableSSL": true, "region": region, "enableSSE": false, - "bucketProvider": warehouseutils.MINIO, + "bucketProvider": whutils.MINIO, }, }) require.NoError(t, err) @@ -381,9 +349,9 @@ func TestIntegration(t *testing.T) { t.Run("schema does not exists", func(t *testing.T) { tableName := "schema_not_exists_test_table" - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) ms := mssql.New(config.New(), logger.NOP, stats.NOP) @@ -397,9 +365,9 @@ func TestIntegration(t *testing.T) { t.Run("table does not exists", func(t *testing.T) { tableName := "table_not_exists_test_table" - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) ms := mssql.New(config.New(), logger.NOP, stats.NOP) @@ -417,9 +385,9 @@ func TestIntegration(t *testing.T) { tableName := "merge_test_table" t.Run("without dedup", func(t *testing.T) { - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) ms := mssql.New(config.New(), logger.NOP, stats.NOP) @@ -442,7 +410,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(0)) require.Equal(t, loadTableStat.RowsUpdated, int64(14)) - records := testhelper.RetrieveRecordsFromWarehouse(t, ms.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, ms.DB.DB, fmt.Sprintf(` SELECT id, @@ -461,12 +429,12 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.SampleTestRecords()) + require.Equal(t, records, whth.SampleTestRecords()) }) t.Run("with dedup", func(t *testing.T) { - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) ms := mssql.New(config.New(), logger.NOP, stats.NOP) @@ -484,7 +452,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(0)) require.Equal(t, loadTableStat.RowsUpdated, int64(14)) - records := testhelper.RetrieveRecordsFromWarehouse(t, ms.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, ms.DB.DB, fmt.Sprintf(` SELECT id, @@ -503,13 +471,13 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.DedupTestRecords()) + require.Equal(t, records, whth.DedupTestRecords()) }) }) t.Run("load file does not exists", func(t *testing.T) { tableName := "load_file_not_exists_test_table" - loadFiles := []warehouseutils.LoadFile{{ + loadFiles := []whutils.LoadFile{{ Location: "http://localhost:1234/testbucket/rudder-warehouse-load-objects/load_file_not_exists_test_table/test_source_id/f31af97e-03e8-46d0-8a1a-1786cb85b22c-load_file_not_exists_test_table/load.csv.gz", }} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) @@ -531,9 +499,9 @@ func TestIntegration(t *testing.T) { t.Run("mismatch in number of columns", func(t *testing.T) { tableName := "mismatch_columns_test_table" - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) ms := mssql.New(config.New(), logger.NOP, stats.NOP) @@ -553,9 +521,9 @@ func TestIntegration(t *testing.T) { t.Run("mismatch in schema", func(t *testing.T) { tableName := "mismatch_schema_test_table" - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) ms := mssql.New(config.New(), logger.NOP, stats.NOP) @@ -573,7 +541,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(14)) require.Equal(t, loadTableStat.RowsUpdated, int64(0)) - records := testhelper.RetrieveRecordsFromWarehouse(t, ms.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, ms.DB.DB, fmt.Sprintf(` SELECT id, @@ -592,15 +560,15 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.MismatchSchemaTestRecords()) + require.Equal(t, records, whth.MismatchSchemaTestRecords()) }) t.Run("discards", func(t *testing.T) { - tableName := warehouseutils.DiscardsTable + tableName := whutils.DiscardsTable - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, warehouseutils.DiscardsSchema, warehouseutils.DiscardsSchema) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, whutils.DiscardsSchema, whutils.DiscardsSchema) ms := mssql.New(config.New(), logger.NOP, stats.NOP) err := ms.Setup(ctx, warehouse, mockUploader) @@ -609,7 +577,7 @@ func TestIntegration(t *testing.T) { err = ms.CreateSchema(ctx) require.NoError(t, err) - err = ms.CreateTable(ctx, tableName, warehouseutils.DiscardsSchema) + err = ms.CreateTable(ctx, tableName, whutils.DiscardsSchema) require.NoError(t, err) loadTableStat, err := ms.LoadTable(ctx, tableName) @@ -617,7 +585,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(6)) require.Equal(t, loadTableStat.RowsUpdated, int64(0)) - records := testhelper.RetrieveRecordsFromWarehouse(t, ms.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, ms.DB.DB, fmt.Sprintf(` SELECT column_name, @@ -634,7 +602,7 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.DiscardTestRecords()) + require.Equal(t, records, whth.DiscardTestRecords()) }) }) } @@ -732,11 +700,11 @@ func TestMSSQL_ProcessColumnValue(t *testing.T) { func newMockUploader( t testing.TB, - loadFiles []warehouseutils.LoadFile, + loadFiles []whutils.LoadFile, tableName string, schemaInUpload model.TableSchema, schemaInWarehouse model.TableSchema, -) warehouseutils.Uploader { +) whutils.Uploader { ctrl := gomock.NewController(t) t.Cleanup(ctrl.Finish) diff --git a/warehouse/integrations/mssql/testdata/template.json b/warehouse/integrations/mssql/testdata/template.json deleted file mode 100644 index 804a4e01c58..00000000000 --- a/warehouse/integrations/mssql/testdata/template.json +++ /dev/null @@ -1,275 +0,0 @@ -{ - "enableMetrics": false, - "workspaceId": "{{.workspaceID}}", - "sources": [ - { - "config": { - "eventUpload": false, - "eventUploadTS": 1637229453729 - }, - "id": "{{.sourceID}}", - "name": "mssql-integration", - "writeKey": "{{.writeKey}}", - "enabled": true, - "sourceDefinitionId": "1TW3fuvuaZqJs877OEailT17KzZ", - "createdBy": "1wLg8l6vAj2TuUUMIIBKL4nsVOT", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-08-08T14:49:21.580Z", - "updatedAt": "2021-11-18T09:57:33.742Z", - "destinations": [ - { - "config": { - "host": "{{.host}}", - "database": "{{.database}}", - "user": "{{.user}}", - "password": "{{.password}}", - "port": "{{.port}}", - "sslMode": "disable", - "namespace": "{{.namespace}}", - "bucketProvider": "MINIO", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "secretAccessKey": "{{.secretAccessKey}}", - "useSSL": false, - "endPoint": "{{.endPoint}}", - "syncFrequency": "30", - "useRudderStorage": false - }, - "secretConfig": {}, - "id": "{{.destinationID}}", - "name": "mssql-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-11-21T18:58:44.286Z", - "updatedAt": "2021-11-21T18:58:44.286Z", - "revisionId": "{{.destinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "database", - "user", - "password", - "port", - "sslMode", - "namespace", - "bucketProvider", - "bucketName", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "secretAccessKey", - "useSSL", - "containerName", - "endPoint", - "syncFrequency", - "syncStartAt", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey", - "accountKey", - "secretAccessKey", - "credentials" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": {}, - "id": "1qvbUYC2xVQ7lvI9UUYkkM4IBt9", - "name": "MSSQL", - "displayName": "Microsoft SQL Server", - "category": "warehouse", - "createdAt": "2021-04-09T10:10:26.589Z", - "updatedAt": "2021-11-11T07:55:15.622Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1TW3fuvuaZqJs877OEailT17KzZ", - "name": "Javascript", - "displayName": "Javascript", - "category": null, - "createdAt": "2019-11-12T12:35:30.464Z", - "updatedAt": "2021-09-28T02:27:30.373Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "config": { - "row_batch_size": 200, - "credentials": { - "auth_type": "Client", - "accountId": "29hOyXzmdF9rz7yR2FTq4pohyXL" - }, - "spreadsheet_id": "1bKQpN-KkhYZd4eqUUoq3Tec6HrJzgqSc8jwVvajnpk8" - }, - "schedule": { - "type": "manual", - "every": 0, - "unit": "minutes" - }, - "prefix": "SGS5" - }, - "liveEventsConfig": {}, - "id": "{{.sourcesSourceID}}", - "name": "mssql-sources-integration", - "writeKey": "{{.sourcesWriteKey}}", - "enabled": true, - "sourceDefinitionId": "29seNpaVfhMp7YVpiBUszPOvmO1", - "createdBy": "279BPpjT6BGqKKhT5qAZuUVZa1h", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "transient": false, - "secretVersion": null, - "createdAt": "2022-08-23T00:21:18.366Z", - "updatedAt": "2022-08-23T00:21:18.366Z", - "sourceDefinition": { - "options": { - "auth": { - "provider": "Google", - "oauthRole": "google_sheets" - }, - "image": "source-google-sheets:v2", - "isBeta": true - }, - "id": "29seNpaVfhMp7YVpiBUszPOvmO1", - "name": "singer-google-sheets", - "displayName": "Singer Google Sheets", - "category": "singer-protocol", - "createdAt": "2022-05-30T04:53:02.188Z", - "updatedAt": "2022-05-30T04:53:02.188Z" - }, - "destinations": [ - { - "config": { - "host": "{{.host}}", - "database": "{{.database}}", - "user": "{{.user}}", - "password": "{{.password}}", - "port": "{{.port}}", - "sslMode": "disable", - "namespace": "{{.sourcesNamespace}}", - "bucketProvider": "MINIO", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "secretAccessKey": "{{.secretAccessKey}}", - "useSSL": false, - "endPoint": "{{.endPoint}}", - "syncFrequency": "30", - "useRudderStorage": false - }, - "secretConfig": {}, - "id": "{{.sourcesDestinationID}}", - "name": "mssql-sources-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-11-21T18:58:44.286Z", - "updatedAt": "2021-11-21T18:58:44.286Z", - "revisionId": "{{.sourcesDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "database", - "user", - "password", - "port", - "sslMode", - "namespace", - "bucketProvider", - "bucketName", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "secretAccessKey", - "useSSL", - "containerName", - "endPoint", - "syncFrequency", - "syncStartAt", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey", - "accountKey", - "secretAccessKey", - "credentials" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": {}, - "id": "1qvbUYC2xVQ7lvI9UUYkkM4IBt9", - "name": "MSSQL", - "displayName": "Microsoft SQL Server", - "category": "warehouse", - "createdAt": "2021-04-09T10:10:26.589Z", - "updatedAt": "2021-11-11T07:55:15.622Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "dgSourceTrackingPlanConfig": null - } - ], - "libraries": [ - { - "versionId": "23Uxw7QEiOg8e0KkQV8LmNfWaWh" - } - ] -} diff --git a/warehouse/integrations/postgres/postgres_test.go b/warehouse/integrations/postgres/postgres_test.go index e026125f4fa..a7b92c7e548 100644 --- a/warehouse/integrations/postgres/postgres_test.go +++ b/warehouse/integrations/postgres/postgres_test.go @@ -12,6 +12,8 @@ import ( "github.com/lib/pq" + kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" + "github.com/rudderlabs/rudder-go-kit/stats" "github.com/stretchr/testify/require" @@ -22,13 +24,10 @@ import ( "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" - kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/runner" th "github.com/rudderlabs/rudder-server/testhelper" - "github.com/rudderlabs/rudder-server/testhelper/health" - "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/client" "github.com/rudderlabs/rudder-server/warehouse/integrations/postgres" @@ -36,7 +35,7 @@ import ( "github.com/rudderlabs/rudder-server/warehouse/integrations/tunnelling" mockuploader "github.com/rudderlabs/rudder-server/warehouse/internal/mocks/utils" "github.com/rudderlabs/rudder-server/warehouse/internal/model" - warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" "github.com/rudderlabs/rudder-server/warehouse/validations" ) @@ -45,160 +44,37 @@ func TestIntegration(t *testing.T) { t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.") } - c := testcompose.New(t, compose.FilePaths([]string{ - "testdata/docker-compose.postgres.yml", - "testdata/docker-compose.ssh-server.yml", - "testdata/docker-compose.replication.yml", - "../testdata/docker-compose.jobsdb.yml", - "../testdata/docker-compose.minio.yml", - })) - c.Start(context.Background()) - misc.Init() validations.Init() - warehouseutils.Init() - - jobsDBPort := c.Port("jobsDb", 5432) - minioPort := c.Port("minio", 9000) - postgresPort := c.Port("postgres", 5432) - sshPort := c.Port("ssh-server", 2222) - primaryDBPort := c.Port("primary", 5432) - standbyDBPort := c.Port("standby", 5432) - - httpPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - workspaceID := warehouseutils.RandHex() - sourceID := warehouseutils.RandHex() - destinationID := warehouseutils.RandHex() - writeKey := warehouseutils.RandHex() - sourcesSourceID := warehouseutils.RandHex() - sourcesDestinationID := warehouseutils.RandHex() - sourcesWriteKey := warehouseutils.RandHex() - tunnelledWriteKey := warehouseutils.RandHex() - tunnelledSourceID := warehouseutils.RandHex() - tunnelledDestinationID := warehouseutils.RandHex() - - destType := warehouseutils.POSTGRES - - namespace := whth.RandSchema(destType) - sourcesNamespace := whth.RandSchema(destType) - tunnelledNamespace := whth.RandSchema(destType) + whutils.Init() + + destType := whutils.POSTGRES host := "localhost" database := "rudderdb" user := "rudder" password := "rudder-password" - - tunnelledHost := "db-private-postgres" - tunnelledDatabase := "postgres" - tunnelledPassword := "postgres" - tunnelledUser := "postgres" - tunnelledPort := "5432" - - tunnelledSSHUser := "rudderstack" - tunnelledSSHHost := "localhost" - tunnelledPrivateKey := "-----BEGIN OPENSSH PRIVATE KEY-----\\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABlwAAAAdzc2gtcn\\nNhAAAAAwEAAQAAAYEA0f/mqkkZ3c9qw8MTz5FoEO3PGecO/dtUFfJ4g1UBu9E7hi/pyVYY\\nfLfdsd5bqA2pXdU0ROymyVe683I1VzJcihUtwB1eQxP1mUhmoo0ixK0IUUGm4PRieCGv+r\\n0/gMvaYbVGUPCi5tAUVh02vZB7p2cTIaz872lvCnRhYbhGUHSbhNSSQOjnCtZfjuZZnE0l\\nPKjWV/wbJ7Pvoc/FZMlWOqL1AjAKuwFH5zs1RMrPDDv5PCZksq4a7DDxziEdq39jvA3sOm\\npQXvzBBBLBOzu7rM3/MPJb6dvAGJcYxkptfL4YXTscIMINr0g24cn+Thvt9yqA93rkb9RB\\nkw6RIEwMlQKqserA+pfsaoW0SkvnlDKzS1DLwXioL4Uc1Jpr/9jTMEfR+W7v7gJPB1JDnV\\ngen5FBfiMqbsG1amUS+mjgNfC8I00tR+CUHxpqUWANtcWTinhSnLJ2skj/2QnciPHkHurR\\nEKyEwCVecgn+xVKyRgVDCGsJ+QnAdn51+i/kO3nvAAAFqENNbN9DTWzfAAAAB3NzaC1yc2\\nEAAAGBANH/5qpJGd3PasPDE8+RaBDtzxnnDv3bVBXyeINVAbvRO4Yv6clWGHy33bHeW6gN\\nqV3VNETspslXuvNyNVcyXIoVLcAdXkMT9ZlIZqKNIsStCFFBpuD0Ynghr/q9P4DL2mG1Rl\\nDwoubQFFYdNr2Qe6dnEyGs/O9pbwp0YWG4RlB0m4TUkkDo5wrWX47mWZxNJTyo1lf8Gyez\\n76HPxWTJVjqi9QIwCrsBR+c7NUTKzww7+TwmZLKuGuww8c4hHat/Y7wN7DpqUF78wQQSwT\\ns7u6zN/zDyW+nbwBiXGMZKbXy+GF07HCDCDa9INuHJ/k4b7fcqgPd65G/UQZMOkSBMDJUC\\nqrHqwPqX7GqFtEpL55Qys0tQy8F4qC+FHNSaa//Y0zBH0flu7+4CTwdSQ51YHp+RQX4jKm\\n7BtWplEvpo4DXwvCNNLUfglB8aalFgDbXFk4p4UpyydrJI/9kJ3Ijx5B7q0RCshMAlXnIJ\\n/sVSskYFQwhrCfkJwHZ+dfov5Dt57wAAAAMBAAEAAAGAd9pxr+ag2LO0353LBMCcgGz5sn\\nLpX4F6cDw/A9XUc3lrW56k88AroaLe6NFbxoJlk6RHfL8EQg3MKX2Za/bWUgjcX7VjQy11\\nEtL7oPKkUVPgV1/8+o8AVEgFxDmWsM+oB/QJ+dAdaVaBBNUPlQmNSXHOvX2ZrpqiQXlCyx\\n79IpYq3JjmEB3dH5ZSW6CkrExrYD+MdhLw/Kv5rISEyI0Qpc6zv1fkB+8nNpXYRTbrDLR9\\n/xJ6jnBH9V3J5DeKU4MUQ39nrAp6iviyWydB973+MOygpy41fXO6hHyVZ2aSCysn1t6J/K\\nQdeEjqAOI/5CbdtiFGp06et799EFyzPItW0FKetW1UTOL2YHqdb+Q9sNjiNlUSzgxMbJWJ\\nRGO6g9B1mJsHl5mJZUiHQPsG/wgBER8VOP4bLOEB6gzVO2GE9HTJTOh5C+eEfrl52wPfXj\\nTqjtWAnhssxtgmWjkS0ibi+u1KMVXKHfaiqJ7nH0jMx+eu1RpMvuR8JqkU8qdMMGChAAAA\\nwHkQMfpCnjNAo6sllEB5FwjEdTBBOt7gu6nLQ2O3uGv0KNEEZ/BWJLQ5fKOfBtDHO+kl+5\\nQoxc0cE7cg64CyBF3+VjzrEzuX5Tuh4NwrsjT4vTTHhCIbIynxEPmKzvIyCMuglqd/nhu9\\n6CXhghuTg8NrC7lY+cImiBfhxE32zqNITlpHW7exr95Gz1sML2TRJqxDN93oUFfrEuInx8\\nHpXXnvMQxPRhcp9nDMU9/ahUamMabQqVVMwKDi8n3sPPzTiAAAAMEA+/hm3X/yNotAtMAH\\ny11parKQwPgEF4HYkSE0bEe+2MPJmEk4M4PGmmt/MQC5N5dXdUGxiQeVMR+Sw0kN9qZjM6\\nSIz0YHQFMsxVmUMKFpAh4UI0GlsW49jSpVXs34Fg95AfhZOYZmOcGcYosp0huCeRlpLeIH\\n7Vv2bkfQaic3uNaVPg7+cXg7zdY6tZlzwa/4Fj0udfTjGQJOPSzIihdMLHnV81rZ2cUOZq\\nMSk6b02aMpVB4TV0l1w4j2mlF2eGD9AAAAwQDVW6p2VXKuPR7SgGGQgHXpAQCFZPGLYd8K\\nduRaCbxKJXzUnZBn53OX5fuLlFhmRmAMXE6ztHPN1/5JjwILn+O49qel1uUvzU8TaWioq7\\nAre3SJR2ZucR4AKUvzUHGP3GWW96xPN8lq+rgb0th1eOSU2aVkaIdeTJhV1iPfaUUf+15S\\nYcJlSHLGgeqkok+VfuudZ73f3RFFhjoe1oAjlPB4leeMsBD9UBLx2U3xAevnfkecF4Lm83\\n4sVswWATSFAFsAAAAsYWJoaW1hbnl1YmFiYmFyQEFiaGltYW55dXMtTWFjQm9vay1Qcm8u\\nbG9jYWwBAgMEBQYH\\n-----END OPENSSH PRIVATE KEY-----" - bucketName := "testbucket" accessKeyID := "MYACCESSKEY" secretAccessKey := "MYSECRETKEY" region := "us-east-1" - minioEndpoint := fmt.Sprintf("localhost:%d", minioPort) - - bootstrapSvc := func(t testing.TB, additionalEnvs map[string]string, preferAppend *bool) { - var preferAppendStr string - if preferAppend != nil { - preferAppendStr = fmt.Sprintf(`"preferAppend": %v,`, *preferAppend) - } - templateConfigurations := map[string]any{ - "workspaceID": workspaceID, - "sourceID": sourceID, - "destinationID": destinationID, - "writeKey": writeKey, - "sourcesSourceID": sourcesSourceID, - "sourcesDestinationID": sourcesDestinationID, - "sourcesWriteKey": sourcesWriteKey, - "tunnelledWriteKey": tunnelledWriteKey, - "tunnelledSourceID": tunnelledSourceID, - "tunnelledDestinationID": tunnelledDestinationID, - "host": host, - "database": database, - "user": user, - "password": password, - "port": strconv.Itoa(postgresPort), - "namespace": namespace, - "sourcesNamespace": sourcesNamespace, - "tunnelledNamespace": tunnelledNamespace, - "tunnelledSSHUser": tunnelledSSHUser, - "tunnelledSSHPort": strconv.Itoa(sshPort), - "tunnelledSSHHost": tunnelledSSHHost, - "tunnelledPrivateKey": tunnelledPrivateKey, - "tunnelledHost": tunnelledHost, - "tunnelledDatabase": tunnelledDatabase, - "tunnelledPort": tunnelledPort, - "tunnelledUser": tunnelledUser, - "tunnelledPassword": tunnelledPassword, - "bucketName": bucketName, - "accessKeyID": accessKeyID, - "secretAccessKey": secretAccessKey, - "endPoint": minioEndpoint, - "preferAppend": preferAppendStr, - } - workspaceConfigPath := workspaceConfig.CreateTempFile(t, "testdata/template.json", templateConfigurations) - - whth.EnhanceWithDefaultEnvs(t) - t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("MINIO_ACCESS_KEY_ID", accessKeyID) - t.Setenv("MINIO_SECRET_ACCESS_KEY", secretAccessKey) - t.Setenv("MINIO_MINIO_ENDPOINT", minioEndpoint) - t.Setenv("MINIO_SSL", "false") - t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_JSONPATH", workspaceConfigPath) - t.Setenv("RSERVER_WAREHOUSE_POSTGRES_MAX_PARALLEL_LOADS", "8") - t.Setenv("RSERVER_WAREHOUSE_POSTGRES_SKIP_COMPUTING_USER_LATEST_TRAITS_WORKSPACE_IDS", workspaceID) - t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_SQLSTATEMENT_EXECUTION_PLAN_WORKSPACE_IDS", workspaceID) - t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_DELETE_BY_JOBS", "true") - t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_DELETE_BY_JOBS", "true") - for envKey, envValue := range additionalEnvs { - t.Setenv(envKey, envValue) - } - - svcDone := make(chan struct{}) - - ctx, cancel := context.WithCancel(context.Background()) - go func() { - r := runner.New(runner.ReleaseInfo{}) - _ = r.Run(ctx, []string{"postgres-integration-test"}) - - close(svcDone) - }() - t.Cleanup(func() { <-svcDone }) - t.Cleanup(cancel) - - serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) - health.WaitUntilReady(ctx, t, serviceHealthEndpoint, time.Minute, time.Second, "serviceHealthEndpoint") - } - t.Run("Events flow", func(t *testing.T) { - dsn := fmt.Sprintf( - "postgres://%s:%s@%s:%s/%s?sslmode=disable", - "rudder", "rudder-password", "localhost", strconv.Itoa(postgresPort), "rudderdb", - ) - db, err := sql.Open("postgres", dsn) + httpPort, err := kithelper.GetFreePort() require.NoError(t, err) - require.NoError(t, db.Ping()) + + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.postgres.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) + postgresPort := c.Port("postgres", 5432) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) jobsDB := whth.JobsDB(t, jobsDBPort) testCases := []struct { name string - writeKey string - schema string - sourceID string - destinationID string tables []string stagingFilesEventsMap whth.EventsCountMap loadFilesEventsMap whth.EventsCountMap @@ -207,32 +83,24 @@ func TestIntegration(t *testing.T) { warehouseEventsMap2 whth.EventsCountMap sourceJob bool stagingFilePrefix string - preferAppend *bool jobRunID string useSameUserID bool - additionalEnvs map[string]string + additionalEnvs func(destinationID string) map[string]string + configOverride map[string]any }{ { - name: "Upload Job", - writeKey: writeKey, - schema: namespace, + name: "Upload Job", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - sourceID: sourceID, - destinationID: destinationID, stagingFilePrefix: "testdata/upload-job", jobRunID: misc.FastUUID().String(), }, { - name: "Append Mode", - writeKey: writeKey, + name: "Append Mode", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap2: whth.EventsCountMap{ "identifies": 8, "users": 1, @@ -243,7 +111,9 @@ func TestIntegration(t *testing.T) { "aliases": 8, "groups": 8, }, - preferAppend: th.Ptr(true), + configOverride: map[string]any{ + "preferAppend": true, + }, stagingFilePrefix: "testdata/upload-job-append-mode", // an empty jobRunID means that the source is not an ETL one // see Uploader.CanAppend() @@ -251,14 +121,10 @@ func TestIntegration(t *testing.T) { useSameUserID: true, }, { - name: "Undefined preferAppend", - writeKey: writeKey, + name: "Undefined preferAppend", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap2: whth.EventsCountMap{ // let's use the same data as "testdata/upload-job-append-mode" // but then for the 2nd sync we expect 4 for each table instead of 8 due to the merge @@ -271,7 +137,6 @@ func TestIntegration(t *testing.T) { "aliases": 4, "groups": 4, }, - preferAppend: nil, // not defined in backend config stagingFilePrefix: "testdata/upload-job-append-mode", // an empty jobRunID means that the source is not an ETL one // see Uploader.CanAppend() @@ -279,14 +144,10 @@ func TestIntegration(t *testing.T) { useSameUserID: true, }, { - name: "Append Users", - writeKey: writeKey, + name: "Append Users", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap: whth.EventsCountMap{ // In the first sync we get 4 events for each table, 1 for users "identifies": 4, @@ -314,24 +175,24 @@ func TestIntegration(t *testing.T) { "aliases": 4, "groups": 4, }, - preferAppend: th.Ptr(true), + configOverride: map[string]any{ + "preferAppend": true, + }, stagingFilePrefix: "testdata/upload-job-append-mode", // we set the jobRunID to make sure the uploader says we cannot append! // same behaviour as redshift, see hyperverge users use case jobRunID: misc.FastUUID().String(), useSameUserID: true, - additionalEnvs: map[string]string{ - "RSERVER_WAREHOUSE_POSTGRES_SKIP_DEDUP_DESTINATION_IDS": destinationID, - "RSERVER_WAREHOUSE_POSTGRES_SKIP_COMPUTING_USER_LATEST_TRAITS": "true", + additionalEnvs: func(destinationID string) map[string]string { + return map[string]string{ + "RSERVER_WAREHOUSE_POSTGRES_SKIP_DEDUP_DESTINATION_IDS": destinationID, + "RSERVER_WAREHOUSE_POSTGRES_SKIP_COMPUTING_USER_LATEST_TRAITS": "true", + } }, }, { name: "Source Job", - writeKey: sourcesWriteKey, - schema: sourcesNamespace, tables: []string{"tracks", "google_sheet"}, - sourceID: sourcesSourceID, - destinationID: sourcesDestinationID, stagingFilesEventsMap: whth.SourcesStagingFilesEventsMap(), loadFilesEventsMap: whth.SourcesLoadFilesEventsMap(), tableUploadsEventsMap: whth.SourcesTableUploadsEventsMap(), @@ -347,10 +208,70 @@ func TestIntegration(t *testing.T) { } for _, tc := range testCases { - tc := tc - t.Run(tc.name, func(t *testing.T) { - bootstrapSvc(t, tc.additionalEnvs, tc.preferAppend) + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + namespace = whth.RandSchema(destType) + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("host", host). + WithConfigOption("database", database). + WithConfigOption("user", user). + WithConfigOption("password", password). + WithConfigOption("port", strconv.Itoa(postgresPort)). + WithConfigOption("sslMode", "disable"). + WithConfigOption("namespace", namespace). + WithConfigOption("bucketProvider", whutils.MINIO). + WithConfigOption("bucketName", bucketName). + WithConfigOption("accessKeyID", accessKeyID). + WithConfigOption("secretAccessKey", secretAccessKey). + WithConfigOption("useSSL", false). + WithConfigOption("endPoint", minioEndpoint). + WithConfigOption("useRudderStorage", false). + WithConfigOption("syncFrequency", "30") + for k, v := range tc.configOverride { + destinationBuilder = destinationBuilder.WithConfigOption(k, v) + } + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_MAX_PARALLEL_LOADS", "8") + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_SKIP_COMPUTING_USER_LATEST_TRAITS_WORKSPACE_IDS", workspaceID) + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_SQLSTATEMENT_EXECUTION_PLAN_WORKSPACE_IDS", workspaceID) + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_DELETE_BY_JOBS", "true") + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_DELETE_BY_JOBS", "true") + if tc.additionalEnvs != nil { + for envKey, envValue := range tc.additionalEnvs(destinationID) { + t.Setenv(envKey, envValue) + } + } + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + dsn := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", + user, password, host, strconv.Itoa(postgresPort), database, + ) + db, err := sql.Open("postgres", dsn) + require.NoError(t, err) + require.NoError(t, db.Ping()) + t.Cleanup(func() { + _ = db.Close() + }) sqlClient := &client.Client{ SQL: db, @@ -358,7 +279,7 @@ func TestIntegration(t *testing.T) { } conf := map[string]any{ - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -369,11 +290,11 @@ func TestIntegration(t *testing.T) { t.Log("verifying test case 1") ts1 := whth.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, TableUploadsEventsMap: tc.tableUploadsEventsMap, @@ -393,11 +314,11 @@ func TestIntegration(t *testing.T) { t.Log("verifying test case 2") ts2 := whth.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, TableUploadsEventsMap: tc.tableUploadsEventsMap, @@ -422,37 +343,31 @@ func TestIntegration(t *testing.T) { } }) - t.Run("Events flow with ssh tunnel", func(t *testing.T) { - bootstrapSvc(t, nil, nil) + t.Run("Events flow with SSH Tunnel", func(t *testing.T) { + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) - dsn := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", - tunnelledUser, - tunnelledPassword, - tunnelledHost, - tunnelledPort, - tunnelledDatabase, - ) - tunnelInfo := &tunnelling.TunnelInfo{ - Config: map[string]interface{}{ - "sshUser": tunnelledSSHUser, - "sshPort": strconv.Itoa(sshPort), - "sshHost": tunnelledSSHHost, - "sshPrivateKey": strings.ReplaceAll(tunnelledPrivateKey, "\\n", "\n"), - }, - } + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.ssh-server.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) - db, err := tunnelling.Connect(dsn, tunnelInfo.Config) - require.NoError(t, err) - require.NoError(t, db.Ping()) + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) + sshPort := c.Port("ssh-server", 2222) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + + tunnelledHost := "db-private-postgres" + tunnelledDatabase := "postgres" + tunnelledPassword := "postgres" + tunnelledUser := "postgres" + tunnelledPort := "5432" + tunnelledSSHUser := "rudderstack" + tunnelledSSHHost := "localhost" + tunnelledPrivateKey := "-----BEGIN OPENSSH PRIVATE KEY-----\\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABlwAAAAdzc2gtcn\\nNhAAAAAwEAAQAAAYEA0f/mqkkZ3c9qw8MTz5FoEO3PGecO/dtUFfJ4g1UBu9E7hi/pyVYY\\nfLfdsd5bqA2pXdU0ROymyVe683I1VzJcihUtwB1eQxP1mUhmoo0ixK0IUUGm4PRieCGv+r\\n0/gMvaYbVGUPCi5tAUVh02vZB7p2cTIaz872lvCnRhYbhGUHSbhNSSQOjnCtZfjuZZnE0l\\nPKjWV/wbJ7Pvoc/FZMlWOqL1AjAKuwFH5zs1RMrPDDv5PCZksq4a7DDxziEdq39jvA3sOm\\npQXvzBBBLBOzu7rM3/MPJb6dvAGJcYxkptfL4YXTscIMINr0g24cn+Thvt9yqA93rkb9RB\\nkw6RIEwMlQKqserA+pfsaoW0SkvnlDKzS1DLwXioL4Uc1Jpr/9jTMEfR+W7v7gJPB1JDnV\\ngen5FBfiMqbsG1amUS+mjgNfC8I00tR+CUHxpqUWANtcWTinhSnLJ2skj/2QnciPHkHurR\\nEKyEwCVecgn+xVKyRgVDCGsJ+QnAdn51+i/kO3nvAAAFqENNbN9DTWzfAAAAB3NzaC1yc2\\nEAAAGBANH/5qpJGd3PasPDE8+RaBDtzxnnDv3bVBXyeINVAbvRO4Yv6clWGHy33bHeW6gN\\nqV3VNETspslXuvNyNVcyXIoVLcAdXkMT9ZlIZqKNIsStCFFBpuD0Ynghr/q9P4DL2mG1Rl\\nDwoubQFFYdNr2Qe6dnEyGs/O9pbwp0YWG4RlB0m4TUkkDo5wrWX47mWZxNJTyo1lf8Gyez\\n76HPxWTJVjqi9QIwCrsBR+c7NUTKzww7+TwmZLKuGuww8c4hHat/Y7wN7DpqUF78wQQSwT\\ns7u6zN/zDyW+nbwBiXGMZKbXy+GF07HCDCDa9INuHJ/k4b7fcqgPd65G/UQZMOkSBMDJUC\\nqrHqwPqX7GqFtEpL55Qys0tQy8F4qC+FHNSaa//Y0zBH0flu7+4CTwdSQ51YHp+RQX4jKm\\n7BtWplEvpo4DXwvCNNLUfglB8aalFgDbXFk4p4UpyydrJI/9kJ3Ijx5B7q0RCshMAlXnIJ\\n/sVSskYFQwhrCfkJwHZ+dfov5Dt57wAAAAMBAAEAAAGAd9pxr+ag2LO0353LBMCcgGz5sn\\nLpX4F6cDw/A9XUc3lrW56k88AroaLe6NFbxoJlk6RHfL8EQg3MKX2Za/bWUgjcX7VjQy11\\nEtL7oPKkUVPgV1/8+o8AVEgFxDmWsM+oB/QJ+dAdaVaBBNUPlQmNSXHOvX2ZrpqiQXlCyx\\n79IpYq3JjmEB3dH5ZSW6CkrExrYD+MdhLw/Kv5rISEyI0Qpc6zv1fkB+8nNpXYRTbrDLR9\\n/xJ6jnBH9V3J5DeKU4MUQ39nrAp6iviyWydB973+MOygpy41fXO6hHyVZ2aSCysn1t6J/K\\nQdeEjqAOI/5CbdtiFGp06et799EFyzPItW0FKetW1UTOL2YHqdb+Q9sNjiNlUSzgxMbJWJ\\nRGO6g9B1mJsHl5mJZUiHQPsG/wgBER8VOP4bLOEB6gzVO2GE9HTJTOh5C+eEfrl52wPfXj\\nTqjtWAnhssxtgmWjkS0ibi+u1KMVXKHfaiqJ7nH0jMx+eu1RpMvuR8JqkU8qdMMGChAAAA\\nwHkQMfpCnjNAo6sllEB5FwjEdTBBOt7gu6nLQ2O3uGv0KNEEZ/BWJLQ5fKOfBtDHO+kl+5\\nQoxc0cE7cg64CyBF3+VjzrEzuX5Tuh4NwrsjT4vTTHhCIbIynxEPmKzvIyCMuglqd/nhu9\\n6CXhghuTg8NrC7lY+cImiBfhxE32zqNITlpHW7exr95Gz1sML2TRJqxDN93oUFfrEuInx8\\nHpXXnvMQxPRhcp9nDMU9/ahUamMabQqVVMwKDi8n3sPPzTiAAAAMEA+/hm3X/yNotAtMAH\\ny11parKQwPgEF4HYkSE0bEe+2MPJmEk4M4PGmmt/MQC5N5dXdUGxiQeVMR+Sw0kN9qZjM6\\nSIz0YHQFMsxVmUMKFpAh4UI0GlsW49jSpVXs34Fg95AfhZOYZmOcGcYosp0huCeRlpLeIH\\n7Vv2bkfQaic3uNaVPg7+cXg7zdY6tZlzwa/4Fj0udfTjGQJOPSzIihdMLHnV81rZ2cUOZq\\nMSk6b02aMpVB4TV0l1w4j2mlF2eGD9AAAAwQDVW6p2VXKuPR7SgGGQgHXpAQCFZPGLYd8K\\nduRaCbxKJXzUnZBn53OX5fuLlFhmRmAMXE6ztHPN1/5JjwILn+O49qel1uUvzU8TaWioq7\\nAre3SJR2ZucR4AKUvzUHGP3GWW96xPN8lq+rgb0th1eOSU2aVkaIdeTJhV1iPfaUUf+15S\\nYcJlSHLGgeqkok+VfuudZ73f3RFFhjoe1oAjlPB4leeMsBD9UBLx2U3xAevnfkecF4Lm83\\n4sVswWATSFAFsAAAAsYWJoaW1hbnl1YmFiYmFyQEFiaGltYW55dXMtTWFjQm9vay1Qcm8u\\nbG9jYWwBAgMEBQYH\\n-----END OPENSSH PRIVATE KEY-----" jobsDB := whth.JobsDB(t, jobsDBPort) testcases := []struct { name string - writeKey string - schema string - sourceID string - destinationID string tables []string stagingFilesEventsMap whth.EventsCountMap loadFilesEventsMap whth.EventsCountMap @@ -461,29 +376,93 @@ func TestIntegration(t *testing.T) { stagingFilePrefix string }{ { - name: "upload job through ssh tunnelling", - writeKey: tunnelledWriteKey, - schema: tunnelledNamespace, + name: "upload job through ssh tunnelling", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - sourceID: tunnelledSourceID, - destinationID: tunnelledDestinationID, stagingFilePrefix: "testdata/upload-ssh-job", }, } for _, tc := range testcases { - tc := tc - t.Run(tc.name, func(t *testing.T) { + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + namespace = whth.RandSchema(destType) + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("host", tunnelledHost). + WithConfigOption("database", tunnelledDatabase). + WithConfigOption("user", tunnelledUser). + WithConfigOption("password", tunnelledPassword). + WithConfigOption("port", tunnelledPort). + WithConfigOption("sslMode", "disable"). + WithConfigOption("namespace", namespace). + WithConfigOption("bucketProvider", whutils.MINIO). + WithConfigOption("bucketName", bucketName). + WithConfigOption("accessKeyID", accessKeyID). + WithConfigOption("secretAccessKey", secretAccessKey). + WithConfigOption("useSSH", true). + WithConfigOption("useSSL", false). + WithConfigOption("endPoint", minioEndpoint). + WithConfigOption("useRudderStorage", false). + WithConfigOption("syncFrequency", "30"). + WithConfigOption("sshUser", tunnelledSSHUser). + WithConfigOption("sshHost", tunnelledSSHHost). + WithConfigOption("sshPort", strconv.Itoa(sshPort)). + WithConfigOption("sshPrivateKey", strings.ReplaceAll(tunnelledPrivateKey, "\\n", "\n")) + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_MAX_PARALLEL_LOADS", "8") + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_SKIP_COMPUTING_USER_LATEST_TRAITS_WORKSPACE_IDS", workspaceID) + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_SQLSTATEMENT_EXECUTION_PLAN_WORKSPACE_IDS", workspaceID) + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_DELETE_BY_JOBS", "true") + t.Setenv("RSERVER_WAREHOUSE_POSTGRES_ENABLE_DELETE_BY_JOBS", "true") + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + dsn := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", + tunnelledUser, tunnelledPassword, tunnelledHost, tunnelledPort, tunnelledDatabase, + ) + tunnelInfo := &tunnelling.TunnelInfo{ + Config: map[string]interface{}{ + "sshUser": tunnelledSSHUser, + "sshPort": strconv.Itoa(sshPort), + "sshHost": tunnelledSSHHost, + "sshPrivateKey": strings.ReplaceAll(tunnelledPrivateKey, "\\n", "\n"), + }, + } + + db, err := tunnelling.Connect(dsn, tunnelInfo.Config) + require.NoError(t, err) + require.NoError(t, db.Ping()) + t.Cleanup(func() { + _ = db.Close() + }) + sqlClient := &client.Client{ SQL: db, Type: client.SQLClient, } conf := map[string]interface{}{ - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -494,10 +473,10 @@ func TestIntegration(t *testing.T) { t.Log("verifying test case 1") ts1 := whth.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + WriteKey: writeKey, + Schema: namespace, + SourceID: sourceID, + DestinationID: destinationID, Tables: tc.tables, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, @@ -518,10 +497,10 @@ func TestIntegration(t *testing.T) { t.Log("verifying test case 2") ts2 := whth.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + WriteKey: writeKey, + Schema: namespace, + SourceID: sourceID, + DestinationID: destinationID, Tables: tc.tables, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, @@ -544,8 +523,14 @@ func TestIntegration(t *testing.T) { }) t.Run("Validations", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.postgres.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + postgresPort := c.Port("postgres", 5432) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + dest := backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ "host": host, "database": database, @@ -554,7 +539,7 @@ func TestIntegration(t *testing.T) { "port": strconv.Itoa(postgresPort), "sslMode": "disable", "namespace": "", - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -576,11 +561,11 @@ func TestIntegration(t *testing.T) { }) t.Run("Load Table", func(t *testing.T) { - const ( - sourceID = "test_source_id" - destinationID = "test_destination_id" - workspaceID = "test_workspace_id" - ) + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.postgres.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + postgresPort := c.Port("postgres", 5432) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) namespace := whth.RandSchema(destType) @@ -610,10 +595,10 @@ func TestIntegration(t *testing.T) { warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", DestinationDefinition: backendconfig.DestinationDefinitionT{ Name: destType, }, @@ -625,7 +610,7 @@ func TestIntegration(t *testing.T) { "port": strconv.Itoa(postgresPort), "sslMode": "disable", "namespace": "", - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -635,12 +620,12 @@ func TestIntegration(t *testing.T) { "useRudderStorage": false, }, }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } fm, err := filemanager.New(&filemanager.Settings{ - Provider: warehouseutils.MINIO, + Provider: whutils.MINIO, Config: map[string]any{ "bucketName": bucketName, "accessKeyID": accessKeyID, @@ -651,7 +636,7 @@ func TestIntegration(t *testing.T) { "disableSSL": true, "region": region, "enableSSE": false, - "bucketProvider": warehouseutils.MINIO, + "bucketProvider": whutils.MINIO, }, }) require.NoError(t, err) @@ -662,7 +647,7 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := mockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) pg := postgres.New(config.New(), logger.NOP, stats.NOP) @@ -679,7 +664,7 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := mockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) pg := postgres.New(config.New(), logger.NOP, stats.NOP) @@ -699,11 +684,11 @@ func TestIntegration(t *testing.T) { tableName := "merge_without_dedup_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := mockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) c := config.New() - c.Set("Warehouse.postgres.EnableSQLStatementExecutionPlanWorkspaceIDs", workspaceID) + c.Set("Warehouse.postgres.EnableSQLStatementExecutionPlanWorkspaceIDs", "test_workspace_id") appendWarehouse := th.Clone(t, warehouse) appendWarehouse.Destination.Config[model.PreferAppendSetting.String()] = true @@ -754,11 +739,11 @@ func TestIntegration(t *testing.T) { tableName := "merge_with_dedup_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := mockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) c := config.New() - c.Set("Warehouse.postgres.EnableSQLStatementExecutionPlanWorkspaceIDs", workspaceID) + c.Set("Warehouse.postgres.EnableSQLStatementExecutionPlanWorkspaceIDs", "test_workspace_id") pg := postgres.New(config.New(), logger.NOP, stats.NOP) err := pg.Setup(ctx, warehouse, mockUploader) @@ -808,11 +793,11 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := mockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) c := config.New() - c.Set("Warehouse.postgres.skipDedupDestinationIDs", destinationID) + c.Set("Warehouse.postgres.skipDedupDestinationIDs", "test_destination_id") appendWarehouse := th.Clone(t, warehouse) appendWarehouse.Destination.Config[model.PreferAppendSetting.String()] = true @@ -862,7 +847,7 @@ func TestIntegration(t *testing.T) { ctx := context.Background() tableName := "load_file_not_exists_test_table" - loadFiles := []warehouseutils.LoadFile{{ + loadFiles := []whutils.LoadFile{{ Location: "http://localhost:1234/testbucket/rudder-warehouse-load-objects/load_file_not_exists_test_table/test_source_id/f31af97e-03e8-46d0-8a1a-1786cb85b22c-load_file_not_exists_test_table/load.csv.gz", }} mockUploader := mockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) @@ -887,7 +872,7 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := mockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) pg := postgres.New(config.New(), logger.NOP, stats.NOP) @@ -910,7 +895,7 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := mockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse) pg := postgres.New(config.New(), logger.NOP, stats.NOP) @@ -929,12 +914,12 @@ func TestIntegration(t *testing.T) { }) t.Run("discards", func(t *testing.T) { ctx := context.Background() - tableName := warehouseutils.DiscardsTable + tableName := whutils.DiscardsTable uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := mockUploader(t, loadFiles, tableName, warehouseutils.DiscardsSchema, warehouseutils.DiscardsSchema) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := mockUploader(t, loadFiles, tableName, whutils.DiscardsSchema, whutils.DiscardsSchema) pg := postgres.New(config.New(), logger.NOP, stats.NOP) err := pg.Setup(ctx, warehouse, mockUploader) @@ -943,7 +928,7 @@ func TestIntegration(t *testing.T) { err = pg.CreateSchema(ctx) require.NoError(t, err) - err = pg.CreateTable(ctx, tableName, warehouseutils.DiscardsSchema) + err = pg.CreateTable(ctx, tableName, whutils.DiscardsSchema) require.NoError(t, err) loadTableStat, err := pg.LoadTable(ctx, tableName) @@ -973,19 +958,21 @@ func TestIntegration(t *testing.T) { }) t.Run("Logical Replication", func(t *testing.T) { - const ( - namespace = "test_namespace" - sourceID = "test_source_id" - destType = "test_dest_type" - workspaceID = "test_workspace_id" - ) + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.replication.yml", "../testdata/docker-compose.minio.yml"})) + c.Start(context.Background()) + + primaryDBPort := c.Port("primary", 5432) + standbyDBPort := c.Port("standby", 5432) + minioEndpoint := fmt.Sprintf("localhost:%d", c.Port("minio", 9000)) + + namespace := whth.RandSchema(destType) warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", DestinationDefinition: backendconfig.DestinationDefinitionT{ Name: destType, }, @@ -997,7 +984,7 @@ func TestIntegration(t *testing.T) { "port": strconv.Itoa(primaryDBPort), "sslMode": "disable", "namespace": "", - "bucketProvider": "MINIO", + "bucketProvider": whutils.MINIO, "bucketName": bucketName, "accessKeyID": accessKeyID, "secretAccessKey": secretAccessKey, @@ -1007,7 +994,7 @@ func TestIntegration(t *testing.T) { "useRudderStorage": false, }, }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } @@ -1017,7 +1004,7 @@ func TestIntegration(t *testing.T) { standByWarehouse.Destination.Config["port"] = strconv.Itoa(standbyDBPort) fm, err := filemanager.New(&filemanager.Settings{ - Provider: warehouseutils.MINIO, + Provider: whutils.MINIO, Config: map[string]any{ "bucketName": bucketName, "accessKeyID": accessKeyID, @@ -1028,21 +1015,19 @@ func TestIntegration(t *testing.T) { "disableSSL": true, "region": region, "enableSSE": false, - "bucketProvider": warehouseutils.MINIO, + "bucketProvider": whutils.MINIO, }, }) require.NoError(t, err) - primaryDSN := fmt.Sprintf( - "postgres://%s:%s@%s:%s/%s?sslmode=disable", - "rudder", "rudder-password", "localhost", strconv.Itoa(primaryDBPort), "rudderdb", + primaryDSN := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", + user, password, host, strconv.Itoa(primaryDBPort), database, ) primaryDB, err := sql.Open("postgres", primaryDSN) require.NoError(t, err) require.NoError(t, primaryDB.Ping()) - standByDSN := fmt.Sprintf( - "postgres://%s:%s@%s:%s/%s?sslmode=disable", - "rudder", "rudder-password", "localhost", strconv.Itoa(standbyDBPort), "rudderdb", + standByDSN := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", + user, password, host, strconv.Itoa(standbyDBPort), database, ) standByDB, err := sql.Open("postgres", standByDSN) require.NoError(t, err) @@ -1065,7 +1050,7 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := mockUploader(t, loadFiles, tableName, replicationTableSchema, replicationTableSchema) primaryPG := postgres.New(config.New(), logger.NOP, stats.NOP) @@ -1158,59 +1143,59 @@ func TestIntegration(t *testing.T) { "received_at": "datetime", } - usersUploadOutput := whth.UploadLoadFile(t, fm, "testdata/users.csv.gz", warehouseutils.UsersTable) - identifiesUploadOutput := whth.UploadLoadFile(t, fm, "testdata/identifies.csv.gz", warehouseutils.IdentifiesTable) + usersUploadOutput := whth.UploadLoadFile(t, fm, "testdata/users.csv.gz", whutils.UsersTable) + identifiesUploadOutput := whth.UploadLoadFile(t, fm, "testdata/identifies.csv.gz", whutils.IdentifiesTable) ctrl := gomock.NewController(t) mockUploader := mockuploader.NewMockUploader(ctrl) mockUploader.EXPECT().UseRudderStorage().Return(false).AnyTimes() - mockUploader.EXPECT().GetLoadFilesMetadata(gomock.Any(), warehouseutils.GetLoadFilesOptions{Table: warehouseutils.UsersTable}).Return([]warehouseutils.LoadFile{{Location: usersUploadOutput.Location}}, nil).AnyTimes() - mockUploader.EXPECT().GetLoadFilesMetadata(gomock.Any(), warehouseutils.GetLoadFilesOptions{Table: warehouseutils.IdentifiesTable}).Return([]warehouseutils.LoadFile{{Location: identifiesUploadOutput.Location}}, nil).AnyTimes() - mockUploader.EXPECT().GetTableSchemaInUpload(warehouseutils.UsersTable).Return(usersTableSchema).AnyTimes() - mockUploader.EXPECT().GetTableSchemaInUpload(warehouseutils.IdentifiesTable).Return(IdentifiesTableSchema).AnyTimes() - mockUploader.EXPECT().GetTableSchemaInWarehouse(warehouseutils.UsersTable).Return(usersTableSchema).AnyTimes() - mockUploader.EXPECT().GetTableSchemaInWarehouse(warehouseutils.IdentifiesTable).Return(IdentifiesTableSchema).AnyTimes() + mockUploader.EXPECT().GetLoadFilesMetadata(gomock.Any(), whutils.GetLoadFilesOptions{Table: whutils.UsersTable}).Return([]whutils.LoadFile{{Location: usersUploadOutput.Location}}, nil).AnyTimes() + mockUploader.EXPECT().GetLoadFilesMetadata(gomock.Any(), whutils.GetLoadFilesOptions{Table: whutils.IdentifiesTable}).Return([]whutils.LoadFile{{Location: identifiesUploadOutput.Location}}, nil).AnyTimes() + mockUploader.EXPECT().GetTableSchemaInUpload(whutils.UsersTable).Return(usersTableSchema).AnyTimes() + mockUploader.EXPECT().GetTableSchemaInUpload(whutils.IdentifiesTable).Return(IdentifiesTableSchema).AnyTimes() + mockUploader.EXPECT().GetTableSchemaInWarehouse(whutils.UsersTable).Return(usersTableSchema).AnyTimes() + mockUploader.EXPECT().GetTableSchemaInWarehouse(whutils.IdentifiesTable).Return(IdentifiesTableSchema).AnyTimes() mockUploader.EXPECT().CanAppend().Return(true).AnyTimes() primaryPG := postgres.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, primaryPG.Setup(ctx, primaryWarehouse, mockUploader)) require.NoError(t, primaryPG.CreateSchema(ctx)) - require.NoError(t, primaryPG.CreateTable(ctx, warehouseutils.IdentifiesTable, IdentifiesTableSchema)) - require.NoError(t, primaryPG.CreateTable(ctx, warehouseutils.UsersTable, usersTableSchema)) + require.NoError(t, primaryPG.CreateTable(ctx, whutils.IdentifiesTable, IdentifiesTableSchema)) + require.NoError(t, primaryPG.CreateTable(ctx, whutils.UsersTable, usersTableSchema)) standByPG := postgres.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, standByPG.Setup(ctx, standByWarehouse, mockUploader)) require.NoError(t, standByPG.CreateSchema(ctx)) - require.NoError(t, standByPG.CreateTable(ctx, warehouseutils.IdentifiesTable, IdentifiesTableSchema)) - require.NoError(t, standByPG.CreateTable(ctx, warehouseutils.UsersTable, usersTableSchema)) + require.NoError(t, standByPG.CreateTable(ctx, whutils.IdentifiesTable, IdentifiesTableSchema)) + require.NoError(t, standByPG.CreateTable(ctx, whutils.UsersTable, usersTableSchema)) // Creating publication and subscription - _, err = primaryDB.ExecContext(ctx, fmt.Sprintf("CREATE PUBLICATION users_publication FOR TABLE %[1]s.%[2]s, %[1]s.%[3]s;", namespace, warehouseutils.IdentifiesTable, warehouseutils.UsersTable)) + _, err = primaryDB.ExecContext(ctx, fmt.Sprintf("CREATE PUBLICATION users_publication FOR TABLE %[1]s.%[2]s, %[1]s.%[3]s;", namespace, whutils.IdentifiesTable, whutils.UsersTable)) require.NoError(t, err) _, err = standByDB.ExecContext(ctx, fmt.Sprintf("CREATE SUBSCRIPTION users_subscription CONNECTION 'host=primary port=5432 user=%s password=%s dbname=%s' PUBLICATION users_publication;", user, password, database)) require.NoError(t, err) // Adding primary key to identifies table - _, err = primaryDB.ExecContext(ctx, fmt.Sprintf(`ALTER TABLE %s.%s ADD PRIMARY KEY ("id");`, namespace, warehouseutils.IdentifiesTable)) + _, err = primaryDB.ExecContext(ctx, fmt.Sprintf(`ALTER TABLE %s.%s ADD PRIMARY KEY ("id");`, namespace, whutils.IdentifiesTable)) require.NoError(t, err) // Loading data should fail for the users table because of the missing primary key errorsMap := primaryPG.LoadUserTables(ctx) - require.NoError(t, errorsMap[warehouseutils.IdentifiesTable]) + require.NoError(t, errorsMap[whutils.IdentifiesTable]) var pgErr *pq.Error - require.ErrorAs(t, errorsMap[warehouseutils.UsersTable], &pgErr) + require.ErrorAs(t, errorsMap[whutils.UsersTable], &pgErr) require.EqualValues(t, pq.ErrorCode("55000"), pgErr.Code) // Adding primary key to users table - _, err = primaryDB.ExecContext(ctx, fmt.Sprintf(`ALTER TABLE %s.%s ADD PRIMARY KEY ("id");`, namespace, warehouseutils.UsersTable)) + _, err = primaryDB.ExecContext(ctx, fmt.Sprintf(`ALTER TABLE %s.%s ADD PRIMARY KEY ("id");`, namespace, whutils.UsersTable)) require.NoError(t, err) // Loading data should work now errorsMap = primaryPG.LoadUserTables(ctx) - require.NoError(t, errorsMap[warehouseutils.IdentifiesTable]) - require.NoError(t, errorsMap[warehouseutils.UsersTable]) + require.NoError(t, errorsMap[whutils.IdentifiesTable]) + require.NoError(t, errorsMap[whutils.UsersTable]) // Checking the number of rows in both primary and standby databases - for _, tableName := range []string{warehouseutils.IdentifiesTable, warehouseutils.UsersTable} { + for _, tableName := range []string{whutils.IdentifiesTable, whutils.UsersTable} { var ( countQuery = fmt.Sprintf("SELECT COUNT(*) FROM %s.%s;", namespace, tableName) count int @@ -1252,11 +1237,11 @@ func TestIntegration(t *testing.T) { func mockUploader( t testing.TB, - loadFiles []warehouseutils.LoadFile, + loadFiles []whutils.LoadFile, tableName string, schemaInUpload model.TableSchema, schemaInWarehouse model.TableSchema, -) warehouseutils.Uploader { +) whutils.Uploader { ctrl := gomock.NewController(t) t.Cleanup(ctrl.Finish) diff --git a/warehouse/integrations/postgres/testdata/template.json b/warehouse/integrations/postgres/testdata/template.json deleted file mode 100644 index d8eefcfb3f8..00000000000 --- a/warehouse/integrations/postgres/testdata/template.json +++ /dev/null @@ -1,408 +0,0 @@ -{ - "enableMetrics": false, - "workspaceId": "{{.workspaceID}}", - "sources": [ - { - "config": { - "eventUpload": false, - "eventUploadTS": 1637229453729 - }, - "id": "{{.tunnelledSourceID}}", - "name": "postgres-ssh-integration", - "writeKey": "{{.tunnelledWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1TW3fuvuaZqJs877OEailT17KzZ", - "createdBy": "1wLg8l6vAj2TuUUMIIBKL4nsVOT", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-08-08T14:49:21.580Z", - "updatedAt": "2021-11-18T09:57:33.742Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "sshUser": "{{.tunnelledSSHUser}}", - "sshHost": "{{.tunnelledSSHHost}}", - "sshPort": "{{.tunnelledSSHPort}}", - "sshPrivateKey": "{{.tunnelledPrivateKey}}", - "useSSH": true, - "host": "{{.tunnelledHost}}", - "database": "{{.tunnelledDatabase}}", - "user": "{{.tunnelledUser}}", - "password": "{{.tunnelledPassword}}", - "port": "{{.tunnelledPort}}", - "sslMode": "disable", - "namespace": "{{.tunnelledNamespace}}", - "bucketProvider": "MINIO", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "secretAccessKey": "{{.secretAccessKey}}", - "useSSL": false, - "endPoint": "{{.endPoint}}", - "syncFrequency": "30", - "useRudderStorage": false - }, - "secretConfig": {}, - "id": "{{.tunnelledDestinationID}}", - "name": "postgres-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-11-18T19:28:48.030Z", - "updatedAt": "2021-11-18T19:28:48.030Z", - "revisionId": "{{.tunnelledDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "sshHost", - "sshPort", - "sshUser", - "sshPrivateKey", - "host", - "database", - "user", - "password", - "port", - "sslMode", - "namespace", - "bucketProvider", - "bucketName", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "secretAccessKey", - "useSSL", - "containerName", - "endPoint", - "syncFrequency", - "syncStartAt", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey", - "accountKey", - "secretAccessKey", - "credentials" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "id": "1bJ4YC7INdkvBTzotNh0zta5jDm", - "name": "POSTGRES", - "displayName": "Postgres", - "category": "warehouse", - "createdAt": "2020-05-01T12:41:47.463Z", - "updatedAt": "2021-11-11T07:56:08.667Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1TW3fuvuaZqJs877OEailT17KzZ", - "name": "Javascript", - "displayName": "Javascript", - "category": null, - "createdAt": "2019-11-12T12:35:30.464Z", - "updatedAt": "2021-09-28T02:27:30.373Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "eventUpload": false, - "eventUploadTS": 1637229453729 - }, - "id": "{{.sourceID}}", - "name": "postgres-integration", - "writeKey": "{{.writeKey}}", - "enabled": true, - "sourceDefinitionId": "1TW3fuvuaZqJs877OEailT17KzZ", - "createdBy": "1wLg8l6vAj2TuUUMIIBKL4nsVOT", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-08-08T14:49:21.580Z", - "updatedAt": "2021-11-18T09:57:33.742Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "host": "{{.host}}", - "database": "{{.database}}", - "user": "{{.user}}", - "password": "{{.password}}", - "port": "{{.port}}", - "sslMode": "disable", - "namespace": "{{.namespace}}", - "bucketProvider": "MINIO", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "secretAccessKey": "{{.secretAccessKey}}", - "useSSL": false, - "endPoint": "{{.endPoint}}", - "syncFrequency": "30", - "useRudderStorage": false - }, - "secretConfig": {}, - "id": "{{.destinationID}}", - "name": "postgres-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-11-18T19:28:48.030Z", - "updatedAt": "2021-11-18T19:28:48.030Z", - "revisionId": "{{.destinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "database", - "user", - "password", - "port", - "sslMode", - "namespace", - "bucketProvider", - "bucketName", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "secretAccessKey", - "useSSL", - "containerName", - "endPoint", - "syncFrequency", - "syncStartAt", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey", - "accountKey", - "secretAccessKey", - "credentials" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "id": "1bJ4YC7INdkvBTzotNh0zta5jDm", - "name": "POSTGRES", - "displayName": "Postgres", - "category": "warehouse", - "createdAt": "2020-05-01T12:41:47.463Z", - "updatedAt": "2021-11-11T07:56:08.667Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1TW3fuvuaZqJs877OEailT17KzZ", - "name": "Javascript", - "displayName": "Javascript", - "category": null, - "createdAt": "2019-11-12T12:35:30.464Z", - "updatedAt": "2021-09-28T02:27:30.373Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "config": { - "row_batch_size": 200, - "credentials": { - "auth_type": "Client", - "accountId": "29hOyXzmdF9rz7yR2FTq4pohyXL" - }, - "spreadsheet_id": "1bKQpN-KkhYZd4eqUUoq3Tec6HrJzgqSc8jwVvajnpk8" - }, - "schedule": { - "type": "manual", - "every": 0, - "unit": "minutes" - }, - "prefix": "SGS5" - }, - "liveEventsConfig": {}, - "id": "{{.sourcesSourceID}}", - "name": "postgres-sources-integration", - "writeKey": "{{.sourcesWriteKey}}", - "enabled": true, - "sourceDefinitionId": "29seNpaVfhMp7YVpiBUszPOvmO1", - "createdBy": "279BPpjT6BGqKKhT5qAZuUVZa1h", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "transient": false, - "secretVersion": null, - "createdAt": "2022-08-23T00:21:18.366Z", - "updatedAt": "2022-08-23T00:21:18.366Z", - "sourceDefinition": { - "options": { - "auth": { - "provider": "Google", - "oauthRole": "google_sheets" - }, - "image": "source-google-sheets:v2", - "isBeta": true - }, - "id": "29seNpaVfhMp7YVpiBUszPOvmO1", - "name": "singer-google-sheets", - "displayName": "Singer Google Sheets", - "category": "singer-protocol", - "createdAt": "2022-05-30T04:53:02.188Z", - "updatedAt": "2022-05-30T04:53:02.188Z" - }, - "destinations": [ - { - "config": { - {{.preferAppend}} - "host": "{{.host}}", - "database": "{{.database}}", - "user": "{{.user}}", - "password": "{{.password}}", - "port": "{{.port}}", - "sslMode": "disable", - "namespace": "{{.sourcesNamespace}}", - "bucketProvider": "MINIO", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "secretAccessKey": "{{.secretAccessKey}}", - "useSSL": false, - "endPoint": "{{.endPoint}}", - "syncFrequency": "30", - "useRudderStorage": false - }, - "secretConfig": {}, - "id": "{{.sourcesDestinationID}}", - "name": "postgres-sources-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2021-11-18T19:28:48.030Z", - "updatedAt": "2021-11-18T19:28:48.030Z", - "revisionId": "{{.sourcesDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "database", - "user", - "password", - "port", - "sslMode", - "namespace", - "bucketProvider", - "bucketName", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "secretAccessKey", - "useSSL", - "containerName", - "endPoint", - "syncFrequency", - "syncStartAt", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey", - "accountKey", - "secretAccessKey", - "credentials" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "id": "1bJ4YC7INdkvBTzotNh0zta5jDm", - "name": "POSTGRES", - "displayName": "Postgres", - "category": "warehouse", - "createdAt": "2020-05-01T12:41:47.463Z", - "updatedAt": "2021-11-11T07:56:08.667Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "dgSourceTrackingPlanConfig": null - } - ], - "libraries": [ - { - "versionId": "23Uxw7QEiOg8e0KkQV8LmNfWaWh" - } - ] -} diff --git a/warehouse/integrations/redshift/redshift_test.go b/warehouse/integrations/redshift/redshift_test.go index c866bf99f09..0f01850495c 100644 --- a/warehouse/integrations/redshift/redshift_test.go +++ b/warehouse/integrations/redshift/redshift_test.go @@ -8,7 +8,6 @@ import ( "fmt" "os" "slices" - "strconv" "strings" "testing" "time" @@ -29,10 +28,8 @@ import ( "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/postgres" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/runner" th "github.com/rudderlabs/rudder-server/testhelper" - "github.com/rudderlabs/rudder-server/testhelper/health" - "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/client" sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" @@ -40,7 +37,7 @@ import ( whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" mockuploader "github.com/rudderlabs/rudder-server/warehouse/internal/mocks/utils" "github.com/rudderlabs/rudder-server/warehouse/internal/model" - warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" "github.com/rudderlabs/rudder-server/warehouse/validations" ) @@ -64,7 +61,7 @@ const testKey = "REDSHIFT_INTEGRATION_TEST_CREDENTIALS" func rsTestCredentials() (*testCredentials, error) { cred, exists := os.LookupEnv(testKey) if !exists { - return nil, errors.New("redshift test credentials not found") + return nil, fmt.Errorf("missing redshift test credentials") } var credentials testCredentials @@ -80,126 +77,35 @@ func TestIntegration(t *testing.T) { t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.") } if _, exists := os.LookupEnv(testKey); !exists { + if os.Getenv("FORCE_RUN_INTEGRATION_TESTS") == "true" { + t.Fatalf("%s environment variable not set", testKey) + } t.Skipf("Skipping %s as %s is not set", t.Name(), testKey) } - c := testcompose.New(t, compose.FilePaths([]string{"../testdata/docker-compose.jobsdb.yml"})) - c.Start(context.Background()) - misc.Init() validations.Init() - warehouseutils.Init() - - jobsDBPort := c.Port("jobsDb", 5432) - - httpPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - workspaceID := warehouseutils.RandHex() - sourceID := warehouseutils.RandHex() - destinationID := warehouseutils.RandHex() - writeKey := warehouseutils.RandHex() - sourcesSourceID := warehouseutils.RandHex() - sourcesDestinationID := warehouseutils.RandHex() - sourcesWriteKey := warehouseutils.RandHex() - iamSourceID := warehouseutils.RandHex() - iamDestinationID := warehouseutils.RandHex() - iamWriteKey := warehouseutils.RandHex() + whutils.Init() - destType := warehouseutils.RS + destType := whutils.RS - namespace := whth.RandSchema(destType) - iamNamespace := whth.RandSchema(destType) - sourcesNamespace := whth.RandSchema(destType) - - rsTestCredentials, err := rsTestCredentials() + credentials, err := rsTestCredentials() require.NoError(t, err) - dsn := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", - rsTestCredentials.UserName, - rsTestCredentials.Password, - rsTestCredentials.Host, - rsTestCredentials.Port, - rsTestCredentials.DbName, - ) - - db, err := sql.Open("postgres", dsn) - require.NoError(t, err) - require.NoError(t, db.Ping()) - - bootstrapSvc := func(t testing.TB, additionalEnvs map[string]string, preferAppend *bool) { - var preferAppendStr string - if preferAppend != nil { - preferAppendStr = fmt.Sprintf(`"preferAppend": %v,`, *preferAppend) - } - - templateConfigurations := map[string]any{ - "workspaceID": workspaceID, - "sourceID": sourceID, - "destinationID": destinationID, - "writeKey": writeKey, - "sourcesSourceID": sourcesSourceID, - "sourcesDestinationID": sourcesDestinationID, - "sourcesWriteKey": sourcesWriteKey, - "iamSourceID": iamSourceID, - "iamDestinationID": iamDestinationID, - "iamWriteKey": iamWriteKey, - "host": rsTestCredentials.Host, - "port": rsTestCredentials.Port, - "user": rsTestCredentials.UserName, - "password": rsTestCredentials.Password, - "database": rsTestCredentials.DbName, - "bucketName": rsTestCredentials.BucketName, - "accessKeyID": rsTestCredentials.AccessKeyID, - "accessKey": rsTestCredentials.AccessKey, - "namespace": namespace, - "sourcesNamespace": sourcesNamespace, - "iamNamespace": iamNamespace, - "preferAppend": preferAppendStr, - "iamUser": rsTestCredentials.IAMUserName, - "iamRoleARNForAuth": rsTestCredentials.IAMRoleARN, - "clusterID": rsTestCredentials.ClusterID, - "clusterRegion": rsTestCredentials.ClusterRegion, - } - workspaceConfigPath := workspaceConfig.CreateTempFile(t, "testdata/template.json", templateConfigurations) - - whth.EnhanceWithDefaultEnvs(t) - t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_JSONPATH", workspaceConfigPath) - t.Setenv("RSERVER_WAREHOUSE_REDSHIFT_MAX_PARALLEL_LOADS", "8") - t.Setenv("RSERVER_WAREHOUSE_REDSHIFT_ENABLE_DELETE_BY_JOBS", "true") - t.Setenv("RSERVER_WAREHOUSE_REDSHIFT_SLOW_QUERY_THRESHOLD", "0s") - for envKey, envValue := range additionalEnvs { - t.Setenv(envKey, envValue) - } - - svcDone := make(chan struct{}) - - ctx, cancel := context.WithCancel(context.Background()) - go func() { - r := runner.New(runner.ReleaseInfo{}) - _ = r.Run(ctx, []string{"redshift-integration-test"}) + t.Run("Events flow", func(t *testing.T) { + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) - close(svcDone) - }() - t.Cleanup(func() { <-svcDone }) - t.Cleanup(cancel) + c := testcompose.New(t, compose.FilePaths([]string{"../testdata/docker-compose.jobsdb.yml"})) + c.Start(context.Background()) - serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) - health.WaitUntilReady(ctx, t, serviceHealthEndpoint, time.Minute, time.Second, "serviceHealthEndpoint") - } + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) - t.Run("Events flow", func(t *testing.T) { jobsDB := whth.JobsDB(t, jobsDBPort) testcase := []struct { name string - writeKey string - schema string - sourceID string - destinationID string tables []string stagingFilesEventsMap whth.EventsCountMap loadFilesEventsMap whth.EventsCountMap @@ -208,30 +114,28 @@ func TestIntegration(t *testing.T) { warehouseEventsMap2 whth.EventsCountMap sourceJob bool stagingFilePrefix string - preferAppend *bool jobRunID string useSameUserID bool - additionalEnvs map[string]string + additionalEnvs func(destinationID string) map[string]string + configOverride map[string]any }{ { name: "Upload Job", - writeKey: writeKey, - schema: namespace, tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, - sourceID: sourceID, - destinationID: destinationID, stagingFilePrefix: "testdata/upload-job", jobRunID: misc.FastUUID().String(), + configOverride: map[string]any{ + "host": credentials.Host, + "port": credentials.Port, + "user": credentials.UserName, + "password": credentials.Password, + }, }, { - name: "Append Mode", - writeKey: writeKey, + name: "Append Mode", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap2: whth.EventsCountMap{ "identifies": 8, "users": 1, @@ -242,32 +146,37 @@ func TestIntegration(t *testing.T) { "aliases": 8, "groups": 8, }, - preferAppend: th.Ptr(true), stagingFilePrefix: "testdata/upload-job-append-mode", // an empty jobRunID means that the source is not an ETL one // see Uploader.CanAppend() jobRunID: "", useSameUserID: true, + configOverride: map[string]any{ + "preferAppend": true, + "host": credentials.Host, + "port": credentials.Port, + "user": credentials.UserName, + "password": credentials.Password, + }, }, { name: "IAM Upload Job", - writeKey: iamWriteKey, - schema: iamNamespace, tables: []string{"identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups"}, - sourceID: iamSourceID, - destinationID: iamDestinationID, stagingFilePrefix: "testdata/upload-job", jobRunID: misc.FastUUID().String(), + configOverride: map[string]any{ + "useIAMForAuth": true, + "user": credentials.IAMUserName, + "iamRoleARNForAuth": credentials.IAMRoleARN, + "clusterId": credentials.ClusterID, + "clusterRegion": credentials.ClusterRegion, + }, }, { - name: "IAM Append Mode", - writeKey: iamWriteKey, + name: "IAM Append Mode", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - schema: iamNamespace, - sourceID: iamSourceID, - destinationID: iamDestinationID, warehouseEventsMap2: whth.EventsCountMap{ "identifies": 8, "users": 1, @@ -278,22 +187,25 @@ func TestIntegration(t *testing.T) { "aliases": 8, "groups": 8, }, - preferAppend: th.Ptr(true), stagingFilePrefix: "testdata/upload-job-append-mode", // an empty jobRunID means that the source is not an ETL one // see Uploader.CanAppend() jobRunID: "", useSameUserID: true, + configOverride: map[string]any{ + "preferAppend": true, + "useIAMForAuth": true, + "user": credentials.IAMUserName, + "iamRoleARNForAuth": credentials.IAMRoleARN, + "clusterId": credentials.ClusterID, + "clusterRegion": credentials.ClusterRegion, + }, }, { - name: "Undefined preferAppend", - writeKey: writeKey, + name: "Undefined preferAppend", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap2: whth.EventsCountMap{ // let's use the same data as "testdata/upload-job-append-mode" // but then for the 2nd sync we expect 4 for each table instead of 8 due to the merge @@ -306,22 +218,23 @@ func TestIntegration(t *testing.T) { "aliases": 4, "groups": 4, }, - preferAppend: nil, // not defined in backend config stagingFilePrefix: "testdata/upload-job-append-mode", // an empty jobRunID means that the source is not an ETL one // see Uploader.CanAppend() jobRunID: "", useSameUserID: true, + configOverride: map[string]any{ + "host": credentials.Host, + "port": credentials.Port, + "user": credentials.UserName, + "password": credentials.Password, + }, }, { - name: "Append Users", - writeKey: writeKey, + name: "Append Users", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - schema: namespace, - sourceID: sourceID, - destinationID: destinationID, warehouseEventsMap: whth.EventsCountMap{ // In the first sync we get 4 events for each table, 1 for users "identifies": 4, @@ -349,24 +262,28 @@ func TestIntegration(t *testing.T) { "aliases": 4, "groups": 4, }, - preferAppend: th.Ptr(true), stagingFilePrefix: "testdata/upload-job-append-mode", // we set the jobRunID to make sure the uploader says we cannot append! // same behaviour as redshift, see hyperverge users use case jobRunID: misc.FastUUID().String(), useSameUserID: true, - additionalEnvs: map[string]string{ - "RSERVER_WAREHOUSE_REDSHIFT_SKIP_DEDUP_DESTINATION_IDS": destinationID, - "RSERVER_WAREHOUSE_REDSHIFT_SKIP_COMPUTING_USER_LATEST_TRAITS": "true", + additionalEnvs: func(destinationID string) map[string]string { + return map[string]string{ + "RSERVER_WAREHOUSE_REDSHIFT_SKIP_DEDUP_DESTINATION_IDS": destinationID, + "RSERVER_WAREHOUSE_REDSHIFT_SKIP_COMPUTING_USER_LATEST_TRAITS": "true", + } + }, + configOverride: map[string]any{ + "preferAppend": true, + "host": credentials.Host, + "port": credentials.Port, + "user": credentials.UserName, + "password": credentials.Password, }, }, { name: "Source Job", - writeKey: sourcesWriteKey, - schema: sourcesNamespace, tables: []string{"tracks", "google_sheet"}, - sourceID: sourcesSourceID, - destinationID: sourcesDestinationID, stagingFilesEventsMap: whth.SourcesStagingFilesEventsMap(), loadFilesEventsMap: whth.SourcesLoadFilesEventsMap(), tableUploadsEventsMap: whth.SourcesTableUploadsEventsMap(), @@ -378,24 +295,71 @@ func TestIntegration(t *testing.T) { sourceJob: true, stagingFilePrefix: "testdata/sources-job", jobRunID: misc.FastUUID().String(), + configOverride: map[string]any{ + "host": credentials.Host, + "port": credentials.Port, + "user": credentials.UserName, + "password": credentials.Password, + }, }, } for _, tc := range testcase { t.Run(tc.name, func(t *testing.T) { - bootstrapSvc(t, tc.additionalEnvs, tc.preferAppend) + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + namespace = whth.RandSchema(destType) + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("database", credentials.DbName). + WithConfigOption("bucketName", credentials.BucketName). + WithConfigOption("accessKeyID", credentials.AccessKeyID). + WithConfigOption("accessKey", credentials.AccessKey). + WithConfigOption("namespace", namespace). + WithConfigOption("enableSSE", false). + WithConfigOption("useRudderStorage", false). + WithConfigOption("syncFrequency", "30") + for k, v := range tc.configOverride { + destinationBuilder = destinationBuilder.WithConfigOption(k, v) + } + + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("RSERVER_WAREHOUSE_REDSHIFT_MAX_PARALLEL_LOADS", "8") + t.Setenv("RSERVER_WAREHOUSE_REDSHIFT_ENABLE_DELETE_BY_JOBS", "true") + t.Setenv("RSERVER_WAREHOUSE_REDSHIFT_SLOW_QUERY_THRESHOLD", "0s") + if tc.additionalEnvs != nil { + for envKey, envValue := range tc.additionalEnvs(destinationID) { + t.Setenv(envKey, envValue) + } + } + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + dsn := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", + credentials.UserName, credentials.Password, credentials.Host, credentials.Port, credentials.DbName, + ) + db, err := sql.Open("postgres", dsn) + require.NoError(t, err) + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) t.Cleanup(func() { - require.Eventually(t, func() bool { - if _, err := db.Exec(fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, tc.schema)); err != nil { - t.Logf("error deleting schema: %v", err) - return false - } - return true - }, - time.Minute, - time.Second, - ) + dropSchema(t, db, namespace) }) sqlClient := &client.Client{ @@ -404,20 +368,20 @@ func TestIntegration(t *testing.T) { } conf := map[string]any{ - "bucketName": rsTestCredentials.BucketName, - "accessKeyID": rsTestCredentials.AccessKeyID, - "accessKey": rsTestCredentials.AccessKey, + "bucketName": credentials.BucketName, + "accessKeyID": credentials.AccessKeyID, + "accessKey": credentials.AccessKey, "enableSSE": false, "useRudderStorage": false, } t.Log("verifying test case 1") ts1 := whth.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, TableUploadsEventsMap: tc.tableUploadsEventsMap, @@ -437,11 +401,11 @@ func TestIntegration(t *testing.T) { t.Log("verifying test case 2") ts2 := whth.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, TableUploadsEventsMap: tc.tableUploadsEventsMap, @@ -467,6 +431,17 @@ func TestIntegration(t *testing.T) { }) t.Run("Validation", func(t *testing.T) { + namespace := whth.RandSchema(destType) + iamNamespace := whth.RandSchema(destType) + + dsn := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", + credentials.UserName, credentials.Password, credentials.Host, credentials.Port, credentials.DbName, + ) + db, err := sql.Open("postgres", dsn) + require.NoError(t, err) + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) + testCases := []struct { name string destination backendconfig.DestinationT @@ -474,16 +449,16 @@ func TestIntegration(t *testing.T) { { name: "With password", destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ - "host": rsTestCredentials.Host, - "port": rsTestCredentials.Port, - "user": rsTestCredentials.UserName, - "password": rsTestCredentials.Password, - "database": rsTestCredentials.DbName, - "bucketName": rsTestCredentials.BucketName, - "accessKeyID": rsTestCredentials.AccessKeyID, - "accessKey": rsTestCredentials.AccessKey, + "host": credentials.Host, + "port": credentials.Port, + "user": credentials.UserName, + "password": credentials.Password, + "database": credentials.DbName, + "bucketName": credentials.BucketName, + "accessKeyID": credentials.AccessKeyID, + "accessKey": credentials.AccessKey, "namespace": namespace, "syncFrequency": "30", "enableSSE": false, @@ -497,24 +472,24 @@ func TestIntegration(t *testing.T) { Name: "redshift-demo", Enabled: true, RevisionID: "29HgOWobrn0RYZLpaSwPIbN2987", - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", }, }, { name: "with IAM Role", destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ - "user": rsTestCredentials.IAMUserName, - "database": rsTestCredentials.DbName, - "bucketName": rsTestCredentials.BucketName, - "accessKeyID": rsTestCredentials.AccessKeyID, - "accessKey": rsTestCredentials.AccessKey, + "user": credentials.IAMUserName, + "database": credentials.DbName, + "bucketName": credentials.BucketName, + "accessKeyID": credentials.AccessKeyID, + "accessKey": credentials.AccessKey, "namespace": iamNamespace, "useIAMForAuth": true, - "iamRoleARNForAuth": rsTestCredentials.IAMRoleARN, - "clusterId": rsTestCredentials.ClusterID, - "clusterRegion": rsTestCredentials.ClusterRegion, + "iamRoleARNForAuth": credentials.IAMRoleARN, + "clusterId": credentials.ClusterID, + "clusterRegion": credentials.ClusterRegion, "syncFrequency": "30", "enableSSE": false, "useRudderStorage": false, @@ -527,7 +502,7 @@ func TestIntegration(t *testing.T) { Name: "redshift-demo", Enabled: true, RevisionID: "29HgOWobrn0RYZLpaSwPIbN2987", - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", }, }, } @@ -535,16 +510,7 @@ func TestIntegration(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { t.Cleanup(func() { - require.Eventually(t, func() bool { - if _, err := db.Exec(fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, tc.destination.Config["namespace"])); err != nil { - t.Logf("error deleting schema: %v", err) - return false - } - return true - }, - time.Minute, - time.Second, - ) + dropSchema(t, db, tc.destination.Config["namespace"].(string)) }) whth.VerifyConfigurationTest(t, tc.destination) @@ -553,15 +519,17 @@ func TestIntegration(t *testing.T) { }) t.Run("Load Table", func(t *testing.T) { - const ( - sourceID = "test_source_id" - destinationID = "test_destination_id" - workspaceID = "test_workspace_id" - ) - namespace := whth.RandSchema(destType) iamNamespace := whth.RandSchema(destType) + dsn := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", + credentials.UserName, credentials.Password, credentials.Host, credentials.Port, credentials.DbName, + ) + db, err := sql.Open("postgres", dsn) + require.NoError(t, err) + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) + testCases := []struct { name string warehouse model.Warehouse @@ -570,19 +538,19 @@ func TestIntegration(t *testing.T) { name: "With password", warehouse: model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ - "host": rsTestCredentials.Host, - "port": rsTestCredentials.Port, - "user": rsTestCredentials.UserName, - "password": rsTestCredentials.Password, - "database": rsTestCredentials.DbName, - "bucketName": rsTestCredentials.BucketName, - "accessKeyID": rsTestCredentials.AccessKeyID, - "accessKey": rsTestCredentials.AccessKey, + "host": credentials.Host, + "port": credentials.Port, + "user": credentials.UserName, + "password": credentials.Password, + "database": credentials.DbName, + "bucketName": credentials.BucketName, + "accessKeyID": credentials.AccessKeyID, + "accessKey": credentials.AccessKey, "namespace": namespace, "syncFrequency": "30", "enableSSE": false, @@ -596,9 +564,9 @@ func TestIntegration(t *testing.T) { Name: "redshift-demo", Enabled: true, RevisionID: "29HgOWobrn0RYZLpaSwPIbN2987", - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, }, }, @@ -606,21 +574,21 @@ func TestIntegration(t *testing.T) { name: "with IAM Role", warehouse: model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ - "user": rsTestCredentials.IAMUserName, - "database": rsTestCredentials.DbName, - "bucketName": rsTestCredentials.BucketName, - "accessKeyID": rsTestCredentials.AccessKeyID, - "accessKey": rsTestCredentials.AccessKey, + "user": credentials.IAMUserName, + "database": credentials.DbName, + "bucketName": credentials.BucketName, + "accessKeyID": credentials.AccessKeyID, + "accessKey": credentials.AccessKey, "namespace": iamNamespace, "useIAMForAuth": true, - "iamRoleARNForAuth": rsTestCredentials.IAMRoleARN, - "clusterId": rsTestCredentials.ClusterID, - "clusterRegion": rsTestCredentials.ClusterRegion, + "iamRoleARNForAuth": credentials.IAMRoleARN, + "clusterId": credentials.ClusterID, + "clusterRegion": credentials.ClusterRegion, "syncFrequency": "30", "enableSSE": false, "useRudderStorage": false, @@ -633,9 +601,9 @@ func TestIntegration(t *testing.T) { Name: "redshift-iam-demo", Enabled: true, RevisionID: "29HgOWobrn0RYZLpaSwPIbN2987", - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: iamNamespace, }, }, @@ -644,16 +612,7 @@ func TestIntegration(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { t.Cleanup(func() { - require.Eventually(t, func() bool { - if _, err := db.Exec(fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, tc.warehouse.Namespace)); err != nil { - t.Logf("error deleting schema: %v", err) - return false - } - return true - }, - time.Minute, - time.Second, - ) + dropSchema(t, db, tc.warehouse.Namespace) }) warehouse := tc.warehouse @@ -683,12 +642,12 @@ func TestIntegration(t *testing.T) { } fm, err := filemanager.New(&filemanager.Settings{ - Provider: warehouseutils.S3, + Provider: whutils.S3, Config: map[string]any{ - "bucketName": rsTestCredentials.BucketName, - "accessKeyID": rsTestCredentials.AccessKeyID, - "accessKey": rsTestCredentials.AccessKey, - "bucketProvider": warehouseutils.S3, + "bucketName": credentials.BucketName, + "accessKeyID": credentials.AccessKeyID, + "accessKey": credentials.AccessKey, + "bucketProvider": whutils.S3, }, }) require.NoError(t, err) @@ -699,8 +658,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) rs := redshift.New(config.New(), logger.NOP, stats.NOP) err := rs.Setup(ctx, warehouse, mockUploader) @@ -716,8 +675,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) rs := redshift.New(config.New(), logger.NOP, stats.NOP) err := rs.Setup(ctx, warehouse, mockUploader) @@ -736,8 +695,8 @@ func TestIntegration(t *testing.T) { tableName := "merge_without_dedup_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) appendWarehouse := th.Clone(t, warehouse) appendWarehouse.Destination.Config[model.PreferAppendSetting.String()] = true @@ -790,8 +749,8 @@ func TestIntegration(t *testing.T) { tableName := "merge_with_dedup_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) d := redshift.New(config.New(), logger.NOP, stats.NOP) err := d.Setup(ctx, warehouse, mockUploader) @@ -841,8 +800,8 @@ func TestIntegration(t *testing.T) { tableName := "merge_with_dedup_window_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) c := config.New() c.Set("Warehouse.redshift.dedupWindow", true) @@ -896,8 +855,8 @@ func TestIntegration(t *testing.T) { tableName := "merge_with_short_dedup_window_test_table" uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) c := config.New() c.Set("Warehouse.redshift.dedupWindow", true) @@ -952,11 +911,11 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) c := config.New() - c.Set("Warehouse.redshift.skipDedupDestinationIDs", []string{destinationID}) + c.Set("Warehouse.redshift.skipDedupDestinationIDs", []string{"test_destination_id"}) appendWarehouse := th.Clone(t, warehouse) appendWarehouse.Destination.Config[model.PreferAppendSetting.String()] = true @@ -1008,10 +967,10 @@ func TestIntegration(t *testing.T) { ctx := context.Background() tableName := "load_file_not_exists_test_table" - loadFiles := []warehouseutils.LoadFile{{ + loadFiles := []whutils.LoadFile{{ Location: "https://bucket.s3.amazonaws.com/rudder-warehouse-load-objects/load_file_not_exists_test_table/test_source_id/0ef75cb0-3fd0-4408-98b9-2bea9e476916-load_file_not_exists_test_table/load.csv.gz", }} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) rs := redshift.New(config.New(), logger.NOP, stats.NOP) err := rs.Setup(ctx, warehouse, mockUploader) @@ -1033,8 +992,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) rs := redshift.New(config.New(), logger.NOP, stats.NOP) err := rs.Setup(ctx, warehouse, mockUploader) @@ -1056,8 +1015,8 @@ func TestIntegration(t *testing.T) { uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, whutils.LoadFileTypeCsv) rs := redshift.New(config.New(), logger.NOP, stats.NOP) err := rs.Setup(ctx, warehouse, mockUploader) @@ -1075,12 +1034,12 @@ func TestIntegration(t *testing.T) { }) t.Run("discards", func(t *testing.T) { ctx := context.Background() - tableName := warehouseutils.DiscardsTable + tableName := whutils.DiscardsTable uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) - loadFiles := []warehouseutils.LoadFile{{Location: uploadOutput.Location}} - mockUploader := newMockUploader(t, loadFiles, tableName, warehouseutils.DiscardsSchema, warehouseutils.DiscardsSchema, warehouseutils.LoadFileTypeCsv) + loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} + mockUploader := newMockUploader(t, loadFiles, tableName, whutils.DiscardsSchema, whutils.DiscardsSchema, whutils.LoadFileTypeCsv) rs := redshift.New(config.New(), logger.NOP, stats.NOP) err := rs.Setup(ctx, warehouse, mockUploader) @@ -1089,7 +1048,7 @@ func TestIntegration(t *testing.T) { err = rs.CreateSchema(ctx) require.NoError(t, err) - err = rs.CreateTable(ctx, tableName, warehouseutils.DiscardsSchema) + err = rs.CreateTable(ctx, tableName, whutils.DiscardsSchema) require.NoError(t, err) loadTableStat, err := rs.LoadTable(ctx, tableName) @@ -1128,11 +1087,11 @@ func TestIntegration(t *testing.T) { fileStat, err := os.Stat("../testdata/load.parquet") require.NoError(t, err) - loadFiles := []warehouseutils.LoadFile{{ + loadFiles := []whutils.LoadFile{{ Location: uploadOutput.Location, Metadata: json.RawMessage(fmt.Sprintf(`{"content_length": %d}`, fileStat.Size())), }} - mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInUpload, warehouseutils.LoadFileTypeParquet) + mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInUpload, whutils.LoadFileTypeParquet) rs := redshift.New(config.New(), logger.NOP, stats.NOP) err = rs.Setup(ctx, warehouse, mockUploader) @@ -1175,8 +1134,8 @@ func TestIntegration(t *testing.T) { t.Run("crashRecover", func(t *testing.T) { ctx := context.Background() tableName := "crash_recovery_test_table" - stgTableName := warehouseutils.StagingTableName(warehouseutils.RS, tableName, 64) - mockUploader := newMockUploader(t, nil, tableName, schemaInUpload, schemaInUpload, warehouseutils.LoadFileTypeParquet) + stgTableName := whutils.StagingTableName(destType, tableName, 64) + mockUploader := newMockUploader(t, nil, tableName, schemaInUpload, schemaInUpload, whutils.LoadFileTypeParquet) rs := redshift.New(config.New(), logger.NOP, stats.NOP) err := rs.Setup(ctx, warehouse, mockUploader) @@ -1220,27 +1179,23 @@ func TestIntegration(t *testing.T) { }) t.Run("Connection timeout using password", func(t *testing.T) { - const ( - sourceID = "test_source_id" - destinationID = "test_destination_id" - workspaceID = "test_workspace_id" - ) + namespace := whth.RandSchema(destType) warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ - "host": rsTestCredentials.Host, - "port": rsTestCredentials.Port, - "user": rsTestCredentials.UserName, - "password": rsTestCredentials.Password, - "database": rsTestCredentials.DbName, - "bucketName": rsTestCredentials.BucketName, - "accessKeyID": rsTestCredentials.AccessKeyID, - "accessKey": rsTestCredentials.AccessKey, + "host": credentials.Host, + "port": credentials.Port, + "user": credentials.UserName, + "password": credentials.Password, + "database": credentials.DbName, + "bucketName": credentials.BucketName, + "accessKeyID": credentials.AccessKeyID, + "accessKey": credentials.AccessKey, "namespace": namespace, "syncFrequency": "30", "enableSSE": false, @@ -1254,9 +1209,9 @@ func TestIntegration(t *testing.T) { Name: "redshift-demo", Enabled: true, RevisionID: "29HgOWobrn0RYZLpaSwPIbN2987", - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } mockCtrl := gomock.NewController(t) @@ -1291,6 +1246,24 @@ func TestIntegration(t *testing.T) { }) } +func dropSchema(t *testing.T, db *sql.DB, namespace string) { + t.Helper() + t.Log("dropping schema", namespace) + + require.Eventually(t, + func() bool { + _, err := db.ExecContext(context.Background(), fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)) + if err != nil { + t.Logf("error deleting schema %q: %v", namespace, err) + return false + } + return true + }, + time.Minute, + time.Second, + ) +} + func TestRedshift_ShouldMerge(t *testing.T) { testCases := []struct { name string @@ -1539,19 +1512,19 @@ func TestRedshift_AlterColumn(t *testing.T) { func newMockUploader( t testing.TB, - loadFiles []warehouseutils.LoadFile, + loadFiles []whutils.LoadFile, tableName string, schemaInUpload model.TableSchema, schemaInWarehouse model.TableSchema, loadFileType string, -) warehouseutils.Uploader { +) whutils.Uploader { ctrl := gomock.NewController(t) t.Cleanup(ctrl.Finish) mockUploader := mockuploader.NewMockUploader(ctrl) mockUploader.EXPECT().UseRudderStorage().Return(false).AnyTimes() mockUploader.EXPECT().GetLoadFilesMetadata(gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, options warehouseutils.GetLoadFilesOptions) ([]warehouseutils.LoadFile, error) { + func(ctx context.Context, options whutils.GetLoadFilesOptions) ([]whutils.LoadFile, error) { return slices.Clone(loadFiles), nil }, ).AnyTimes() diff --git a/warehouse/integrations/redshift/testdata/template.json b/warehouse/integrations/redshift/testdata/template.json deleted file mode 100644 index 86bf8104d60..00000000000 --- a/warehouse/integrations/redshift/testdata/template.json +++ /dev/null @@ -1,385 +0,0 @@ -{ - "enableMetrics": false, - "workspaceId": "{{.workspaceID}}", - "sources": [ - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "id": "{{.sourceID}}", - "name": "redshift-integration", - "writeKey": "{{.writeKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "279Kz3NhcXsUAx2KHnx1HKhdtbk", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-03-31T11:45:57.339Z", - "updatedAt": "2022-05-23T22:56:32.000Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "host": "{{.host}}", - "port": "{{.port}}", - "database": "{{.database}}", - "user": "{{.user}}", - "password": "{{.password}}", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "accessKey": "{{.accessKey}}", - "prefix": "", - "namespace": "{{.namespace}}", - "syncFrequency": "30", - "enableSSE": false, - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.destinationID}}", - "name": "redshift-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-04-07T09:57:14.783Z", - "updatedAt": "2022-05-17T08:16:32.613Z", - "revisionId": "{{.destinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "port", - "database", - "user", - "password", - "bucketName", - "accessKeyID", - "accessKey", - "prefix", - "namespace", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1UVZiJF7OgLaiIY2Jts8XOQE3M6", - "name": "RS", - "displayName": "Redshift", - "category": "warehouse", - "createdAt": "2019-12-04T07:17:28.337Z", - "updatedAt": "2022-02-08T06:46:22.804Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1653346591988 - }, - "id": "{{.iamSourceID}}", - "name": "redshift-iam-integration", - "writeKey": "{{.iamWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "279Kz3NhcXsUAx2KHnx1HKhdtbk", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-03-31T11:45:57.339Z", - "updatedAt": "2022-05-23T22:56:32.000Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "database": "{{.database}}", - "user": "{{.iamUser}}", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "accessKey": "{{.accessKey}}", - "prefix": "", - "namespace": "{{.iamNamespace}}", - "syncFrequency": "30", - "useIAMForAuth": true, - "iamRoleARNForAuth": "{{.iamRoleARNForAuth}}", - "clusterId": "{{.clusterID}}", - "clusterRegion": "{{.clusterRegion}}", - "enableSSE": false, - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.iamDestinationID}}", - "name": "redshift-iam-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-04-07T09:57:14.783Z", - "updatedAt": "2022-05-17T08:16:32.613Z", - "revisionId": "{{.iamDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "port", - "database", - "user", - "useIAMForAuth", - "password", - "iamRoleARNForAuth", - "clusterId", - "clusterRegion", - "bucketName", - "useIAMForAuth", - "accessKeyID", - "accessKey", - "prefix", - "namespace", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1UVZiJF7OgLaiIY2Jts8XOQE3M6", - "name": "RS", - "displayName": "Redshift", - "category": "warehouse", - "createdAt": "2019-12-04T07:17:28.337Z", - "updatedAt": "2022-02-08T06:46:22.804Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "config": { - "row_batch_size": 200, - "credentials": { - "auth_type": "Client", - "accountId": "29hOyXzmdF9rz7yR2FTq4pohyXL" - }, - "spreadsheet_id": "1bKQpN-KkhYZd4eqUUoq3Tec6HrJzgqSc8jwVvajnpk8" - }, - "schedule": { - "type": "manual", - "every": 0, - "unit": "minutes" - }, - "prefix": "SGS5" - }, - "liveEventsConfig": {}, - "id": "{{.sourcesSourceID}}", - "name": "redshift-sources-integration", - "writeKey": "{{.sourcesWriteKey}}", - "enabled": true, - "sourceDefinitionId": "29seNpaVfhMp7YVpiBUszPOvmO1", - "createdBy": "279BPpjT6BGqKKhT5qAZuUVZa1h", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "transient": false, - "secretVersion": null, - "createdAt": "2022-08-23T00:21:18.366Z", - "updatedAt": "2022-08-23T00:21:18.366Z", - "sourceDefinition": { - "options": { - "auth": { - "provider": "Google", - "oauthRole": "google_sheets" - }, - "image": "source-google-sheets:v2", - "isBeta": true - }, - "id": "29seNpaVfhMp7YVpiBUszPOvmO1", - "name": "singer-google-sheets", - "displayName": "Singer Google Sheets", - "category": "singer-protocol", - "createdAt": "2022-05-30T04:53:02.188Z", - "updatedAt": "2022-05-30T04:53:02.188Z" - }, - "destinations": [ - { - "config": { - {{.preferAppend}} - "host": "{{.host}}", - "port": "{{.port}}", - "database": "{{.database}}", - "user": "{{.user}}", - "password": "{{.password}}", - "bucketName": "{{.bucketName}}", - "accessKeyID": "{{.accessKeyID}}", - "accessKey": "{{.accessKey}}", - "prefix": "", - "namespace": "{{.sourcesNamespace}}", - "syncFrequency": "30", - "enableSSE": false, - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.sourcesDestinationID}}", - "name": "redshift-sources-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-04-07T09:57:14.783Z", - "updatedAt": "2022-05-17T08:16:32.613Z", - "revisionId": "{{.sourcesDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "host", - "port", - "database", - "user", - "password", - "bucketName", - "accessKeyID", - "accessKey", - "prefix", - "namespace", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1UVZiJF7OgLaiIY2Jts8XOQE3M6", - "name": "RS", - "displayName": "Redshift", - "category": "warehouse", - "createdAt": "2019-12-04T07:17:28.337Z", - "updatedAt": "2022-02-08T06:46:22.804Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "dgSourceTrackingPlanConfig": null - } - ], - "libraries": [ - { - "versionId": "23Uxw7QEiOg8e0KkQV8LmNfWaWh" - } - ] -} diff --git a/warehouse/integrations/snowflake/snowflake_test.go b/warehouse/integrations/snowflake/snowflake_test.go index 242df22fa65..5a861b092ef 100644 --- a/warehouse/integrations/snowflake/snowflake_test.go +++ b/warehouse/integrations/snowflake/snowflake_test.go @@ -2,12 +2,12 @@ package snowflake_test import ( "context" + "database/sql" "encoding/json" "errors" "fmt" "os" "slices" - "strconv" "strings" "testing" "time" @@ -31,15 +31,13 @@ import ( kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/runner" th "github.com/rudderlabs/rudder-server/testhelper" - "github.com/rudderlabs/rudder-server/testhelper/health" - "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/client" "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/integrations/snowflake" - "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" + whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" mockuploader "github.com/rudderlabs/rudder-server/warehouse/internal/mocks/utils" "github.com/rudderlabs/rudder-server/warehouse/internal/model" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" @@ -86,319 +84,198 @@ func TestIntegration(t *testing.T) { if os.Getenv("SLOW") != "1" { t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.") } - for _, key := range []string{ - testKey, - testRBACKey, - testKeyPairEncrypted, - testKeyPairUnencrypted, - } { - if _, exists := os.LookupEnv(key); !exists { - t.Skipf("Skipping %s as %s is not set", t.Name(), key) + if _, exists := os.LookupEnv(testKey); !exists { + if os.Getenv("FORCE_RUN_INTEGRATION_TESTS") == "true" { + t.Fatalf("%s environment variable not set", testKey) } + t.Skipf("Skipping %s as %s is not set", t.Name(), testKey) } - credentials, err := getSnowflakeTestCredentials(testKey) - require.NoError(t, err) - - rbacCredentials, err := getSnowflakeTestCredentials(testRBACKey) - require.NoError(t, err) - - credentialsKeyPairEncrypted, err := getSnowflakeTestCredentials(testKeyPairEncrypted) - require.NoError(t, err) - - credentialsKeyPairUnencrypted, err := getSnowflakeTestCredentials(testKeyPairUnencrypted) - require.NoError(t, err) - - c := testcompose.New(t, compose.FilePaths([]string{"../testdata/docker-compose.jobsdb.yml"})) - c.Start(context.Background()) - misc.Init() validations.Init() whutils.Init() - jobsDBPort := c.Port("jobsDb", 5432) + destType := whutils.SNOWFLAKE - httpPort, err := kithelper.GetFreePort() + credentials, err := getSnowflakeTestCredentials(testKey) require.NoError(t, err) - workspaceID := whutils.RandHex() - sourceID := whutils.RandHex() - destinationID := whutils.RandHex() - writeKey := whutils.RandHex() - caseSensitiveSourceID := whutils.RandHex() - caseSensitiveDestinationID := whutils.RandHex() - caseSensitiveWriteKey := whutils.RandHex() - rbacSourceID := whutils.RandHex() - rbacDestinationID := whutils.RandHex() - rbacWriteKey := whutils.RandHex() - sourcesSourceID := whutils.RandHex() - sourcesDestinationID := whutils.RandHex() - sourcesWriteKey := whutils.RandHex() - keypairEncryptedSourceID := whutils.RandHex() - keypairEncryptedDestinationID := whutils.RandHex() - keypairEncryptedWriteKey := whutils.RandHex() - keypairUnencryptedSourceID := whutils.RandHex() - keypairUnencryptedDestinationID := whutils.RandHex() - keypairUnencryptedWriteKey := whutils.RandHex() - - destType := whutils.SNOWFLAKE - - namespace := testhelper.RandSchema(destType) - rbacNamespace := testhelper.RandSchema(destType) - sourcesNamespace := testhelper.RandSchema(destType) - caseSensitiveNamespace := testhelper.RandSchema(destType) - keypairEncryptedNamespace := testhelper.RandSchema(destType) - keypairUnencryptedNamespace := testhelper.RandSchema(destType) - - bootstrapSvc := func(t testing.TB, preferAppend *bool) { - var preferAppendStr string - if preferAppend != nil { - preferAppendStr = fmt.Sprintf(`"preferAppend": %v,`, *preferAppend) - } - templateConfigurations := map[string]any{ - "workspaceID": workspaceID, - "sourceID": sourceID, - "destinationID": destinationID, - "writeKey": writeKey, - "caseSensitiveSourceID": caseSensitiveSourceID, - "caseSensitiveDestinationID": caseSensitiveDestinationID, - "caseSensitiveWriteKey": caseSensitiveWriteKey, - "rbacSourceID": rbacSourceID, - "rbacDestinationID": rbacDestinationID, - "rbacWriteKey": rbacWriteKey, - "sourcesSourceID": sourcesSourceID, - "sourcesDestinationID": sourcesDestinationID, - "sourcesWriteKey": sourcesWriteKey, - "keypairEncryptedSourceID": keypairEncryptedSourceID, - "keypairEncryptedDestinationID": keypairEncryptedDestinationID, - "keypairEncryptedWriteKey": keypairEncryptedWriteKey, - "keypairUnencryptedSourceID": keypairUnencryptedSourceID, - "keypairUnencryptedDestinationID": keypairUnencryptedDestinationID, - "keypairUnencryptedWriteKey": keypairUnencryptedWriteKey, - "account": credentials.Account, - "user": credentials.User, - "password": credentials.Password, - "database": credentials.Database, - "caseSensitiveDatabase": strings.ToLower(credentials.Database), - "warehouse": credentials.Warehouse, - "bucketName": credentials.BucketName, - "accessKeyID": credentials.AccessKeyID, - "accessKey": credentials.AccessKey, - "namespace": namespace, - "sourcesNamespace": sourcesNamespace, - "caseSensitiveNamespace": caseSensitiveNamespace, - "keypairEncryptedNamespace": keypairEncryptedNamespace, - "keypairUnencryptedNamespace": keypairUnencryptedNamespace, - "rbacNamespace": rbacNamespace, - "rbacAccount": rbacCredentials.Account, - "rbacUser": rbacCredentials.User, - "rbacPassword": rbacCredentials.Password, - "rbacRole": rbacCredentials.Role, - "rbacDatabase": rbacCredentials.Database, - "rbacWarehouse": rbacCredentials.Warehouse, - "rbacBucketName": rbacCredentials.BucketName, - "rbacAccessKeyID": rbacCredentials.AccessKeyID, - "rbacAccessKey": rbacCredentials.AccessKey, - "keypairEncryptedUser": credentialsKeyPairEncrypted.User, - "keypairEncryptedPrivateKey": strings.ReplaceAll(credentialsKeyPairEncrypted.PrivateKey, "\n", "\\n"), - "keypairEncryptedPassphrase": credentialsKeyPairEncrypted.PrivateKeyPassphrase, - "keypairUnencryptedUser": credentialsKeyPairUnencrypted.User, - "keypairUnencryptedPrivateKey": strings.ReplaceAll(credentialsKeyPairUnencrypted.PrivateKey, "\n", "\\n"), - "preferAppend": preferAppendStr, + t.Run("Event flow", func(t *testing.T) { + for _, key := range []string{ + testRBACKey, + testKeyPairEncrypted, + testKeyPairUnencrypted, + } { + if _, exists := os.LookupEnv(key); !exists { + if os.Getenv("FORCE_RUN_INTEGRATION_TESTS") == "true" { + t.Fatalf("%s environment variable not set", key) + } + t.Skipf("Skipping %s as %s is not set", t.Name(), key) + } } - workspaceConfigPath := workspaceConfig.CreateTempFile(t, "testdata/template.json", templateConfigurations) - - testhelper.EnhanceWithDefaultEnvs(t) - t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) - t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_MAX_PARALLEL_LOADS", "8") - t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_ENABLE_DELETE_BY_JOBS", "true") - t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_JSONPATH", workspaceConfigPath) - t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_SLOW_QUERY_THRESHOLD", "0s") - t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_DEBUG_DUPLICATE_WORKSPACE_IDS", workspaceID) - t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_DEBUG_DUPLICATE_TABLES", strings.Join( - []string{ - "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", - }, - " ", - )) - ctx, cancel := context.WithCancel(context.Background()) - svcDone := make(chan struct{}) + rbacCredentials, err := getSnowflakeTestCredentials(testRBACKey) + require.NoError(t, err) + keyPairEncryptedCredentials, err := getSnowflakeTestCredentials(testKeyPairEncrypted) + require.NoError(t, err) + keyPairUnEncryptedCredentials, err := getSnowflakeTestCredentials(testKeyPairUnencrypted) + require.NoError(t, err) - go func() { - r := runner.New(runner.ReleaseInfo{}) - _ = r.Run(ctx, []string{"snowflake-integration-test"}) - close(svcDone) - }() + httpPort, err := kithelper.GetFreePort() + require.NoError(t, err) - t.Cleanup(func() { <-svcDone }) - t.Cleanup(cancel) + c := testcompose.New(t, compose.FilePaths([]string{"../testdata/docker-compose.jobsdb.yml"})) + c.Start(context.Background()) - serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) - health.WaitUntilReady(ctx, t, serviceHealthEndpoint, time.Minute, 100*time.Millisecond, "serviceHealthEndpoint") - } + workspaceID := whutils.RandHex() + jobsDBPort := c.Port("jobsDb", 5432) - t.Run("Event flow", func(t *testing.T) { - jobsDB := testhelper.JobsDB(t, jobsDBPort) - - database := credentials.Database + jobsDB := whth.JobsDB(t, jobsDBPort) testcase := []struct { name string - writeKey string - schema string - sourceID string - destinationID string tables []string - stagingFilesEventsMap testhelper.EventsCountMap - stagingFilesModifiedEventsMap testhelper.EventsCountMap - loadFilesEventsMap testhelper.EventsCountMap - tableUploadsEventsMap testhelper.EventsCountMap - warehouseEventsMap testhelper.EventsCountMap - warehouseEventsMap2 testhelper.EventsCountMap + stagingFilesEventsMap whth.EventsCountMap + stagingFilesModifiedEventsMap whth.EventsCountMap + loadFilesEventsMap whth.EventsCountMap + tableUploadsEventsMap whth.EventsCountMap + warehouseEventsMap whth.EventsCountMap + warehouseEventsMap2 whth.EventsCountMap cred *testCredentials database string sourceJob bool stagingFilePrefix string emptyJobRunID bool - preferAppend *bool customUserID string + configOverride map[string]any }{ { - name: "Upload Job with Normal Database", - writeKey: writeKey, - schema: namespace, + name: "Upload Job with Normal Database", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - sourceID: sourceID, - destinationID: destinationID, - cred: credentials, - database: database, - stagingFilesEventsMap: testhelper.EventsCountMap{ + cred: credentials, + database: credentials.Database, + stagingFilesEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, - stagingFilesModifiedEventsMap: testhelper.EventsCountMap{ + stagingFilesModifiedEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, stagingFilePrefix: "testdata/upload-job", - preferAppend: th.Ptr(false), + configOverride: map[string]any{ + "preferAppend": false, + "password": credentials.Password, + }, }, { - name: "Upload Job with Role", - writeKey: rbacWriteKey, - schema: rbacNamespace, + name: "Upload Job with Role", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - sourceID: rbacSourceID, - destinationID: rbacDestinationID, - cred: rbacCredentials, - database: database, - stagingFilesEventsMap: testhelper.EventsCountMap{ + cred: rbacCredentials, + database: rbacCredentials.Database, + stagingFilesEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, - stagingFilesModifiedEventsMap: testhelper.EventsCountMap{ + stagingFilesModifiedEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, stagingFilePrefix: "testdata/upload-job-with-role", - preferAppend: th.Ptr(false), + configOverride: map[string]any{ + "preferAppend": false, + "role": rbacCredentials.Role, + "password": rbacCredentials.Password, + }, }, { - name: "Upload Job with Case Sensitive Database", - writeKey: caseSensitiveWriteKey, - schema: caseSensitiveNamespace, + name: "Upload Job with Case Sensitive Database", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - sourceID: caseSensitiveSourceID, - destinationID: caseSensitiveDestinationID, - cred: credentials, - database: strings.ToLower(database), - stagingFilesEventsMap: testhelper.EventsCountMap{ + cred: credentials, + database: strings.ToLower(credentials.Database), + stagingFilesEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, - stagingFilesModifiedEventsMap: testhelper.EventsCountMap{ + stagingFilesModifiedEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, stagingFilePrefix: "testdata/upload-job-case-sensitive", - preferAppend: th.Ptr(false), + configOverride: map[string]any{ + "preferAppend": false, + "password": credentials.Password, + }, }, { - name: "Upload Job with Key Pair Unencrypted Key", - writeKey: keypairUnencryptedWriteKey, - schema: keypairUnencryptedNamespace, + name: "Upload Job with Key Pair Unencrypted Key", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - sourceID: keypairUnencryptedSourceID, - destinationID: keypairUnencryptedDestinationID, - cred: credentialsKeyPairUnencrypted, - database: strings.ToLower(database), - stagingFilesEventsMap: testhelper.EventsCountMap{ + cred: keyPairUnEncryptedCredentials, + database: keyPairUnEncryptedCredentials.Database, + stagingFilesEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, - stagingFilesModifiedEventsMap: testhelper.EventsCountMap{ + stagingFilesModifiedEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, stagingFilePrefix: "testdata/upload-job-case-sensitive", - preferAppend: th.Ptr(false), + configOverride: map[string]any{ + "preferAppend": false, + "useKeyPairAuth": true, + "privateKey": keyPairUnEncryptedCredentials.PrivateKey, + }, }, { - name: "Upload Job with Key Pair Encrypted Key", - writeKey: keypairEncryptedWriteKey, - schema: keypairEncryptedNamespace, + name: "Upload Job with Key Pair Encrypted Key", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - sourceID: keypairEncryptedSourceID, - destinationID: keypairEncryptedDestinationID, - cred: credentialsKeyPairEncrypted, - database: strings.ToLower(database), - stagingFilesEventsMap: testhelper.EventsCountMap{ + cred: keyPairEncryptedCredentials, + database: keyPairEncryptedCredentials.Database, + stagingFilesEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, - stagingFilesModifiedEventsMap: testhelper.EventsCountMap{ + stagingFilesModifiedEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, stagingFilePrefix: "testdata/upload-job-case-sensitive", - preferAppend: th.Ptr(false), + configOverride: map[string]any{ + "preferAppend": false, + "useKeyPairAuth": true, + "privateKey": keyPairEncryptedCredentials.PrivateKey, + "privateKeyPassphrase": keyPairEncryptedCredentials.PrivateKeyPassphrase, + }, }, { - name: "Source Job with Sources", - writeKey: sourcesWriteKey, - schema: sourcesNamespace, - tables: []string{"tracks", "google_sheet"}, - sourceID: sourcesSourceID, - destinationID: sourcesDestinationID, - cred: credentials, - database: database, - stagingFilesEventsMap: testhelper.EventsCountMap{ + name: "Source Job with Sources", + tables: []string{"tracks", "google_sheet"}, + cred: credentials, + database: credentials.Database, + stagingFilesEventsMap: whth.EventsCountMap{ "wh_staging_files": 9, // 8 + 1 (merge events because of ID resolution) }, - stagingFilesModifiedEventsMap: testhelper.EventsCountMap{ + stagingFilesModifiedEventsMap: whth.EventsCountMap{ "wh_staging_files": 8, // 8 (de-duped by encounteredMergeRuleMap) }, - loadFilesEventsMap: testhelper.SourcesLoadFilesEventsMap(), - tableUploadsEventsMap: testhelper.SourcesTableUploadsEventsMap(), - warehouseEventsMap: testhelper.SourcesWarehouseEventsMap(), + loadFilesEventsMap: whth.SourcesLoadFilesEventsMap(), + tableUploadsEventsMap: whth.SourcesTableUploadsEventsMap(), + warehouseEventsMap: whth.SourcesWarehouseEventsMap(), sourceJob: true, stagingFilePrefix: "testdata/sources-job", - preferAppend: th.Ptr(false), + configOverride: map[string]any{ + "preferAppend": false, + "password": credentials.Password, + }, }, { name: "Upload Job in append mode", - writeKey: writeKey, - schema: namespace, tables: []string{"identifies", "users", "tracks"}, - sourceID: sourceID, - destinationID: destinationID, cred: credentials, - database: database, - stagingFilesEventsMap: testhelper.EventsCountMap{"wh_staging_files": 3}, - stagingFilesModifiedEventsMap: testhelper.EventsCountMap{"wh_staging_files": 3}, + database: credentials.Database, + stagingFilesEventsMap: whth.EventsCountMap{"wh_staging_files": 3}, + stagingFilesModifiedEventsMap: whth.EventsCountMap{"wh_staging_files": 3}, loadFilesEventsMap: map[string]int{"identifies": 1, "users": 1, "tracks": 1}, tableUploadsEventsMap: map[string]int{"identifies": 1, "users": 1, "tracks": 1}, warehouseEventsMap: map[string]int{"identifies": 1, "users": 1, "tracks": 1}, @@ -407,37 +284,86 @@ func TestIntegration(t *testing.T) { // an empty jobRunID means that the source is not an ETL one // see Uploader.CanAppend() emptyJobRunID: true, - preferAppend: th.Ptr(true), - customUserID: testhelper.GetUserId("append_test"), + configOverride: map[string]any{ + "preferAppend": true, + "password": credentials.Password, + }, + customUserID: whth.GetUserId("append_test"), }, { - name: "Undefined preferAppend", - writeKey: writeKey, - schema: namespace, + name: "Undefined preferAppend", tables: []string{ "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", }, - sourceID: sourceID, - destinationID: destinationID, - cred: credentials, - database: database, - stagingFilesEventsMap: testhelper.EventsCountMap{ + cred: credentials, + database: credentials.Database, + stagingFilesEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, - stagingFilesModifiedEventsMap: testhelper.EventsCountMap{ + stagingFilesModifiedEventsMap: whth.EventsCountMap{ "wh_staging_files": 34, // 32 + 2 (merge events because of ID resolution) }, stagingFilePrefix: "testdata/upload-job-undefined-preferAppend-mode", - preferAppend: nil, // not defined in backend config + configOverride: map[string]any{ + "password": credentials.Password, + }, }, } for _, tc := range testcase { - tc := tc t.Run(tc.name, func(t *testing.T) { - bootstrapSvc(t, tc.preferAppend) + var ( + sourceID = whutils.RandHex() + destinationID = whutils.RandHex() + writeKey = whutils.RandHex() + namespace = whth.RandSchema(destType) + ) + + destinationBuilder := backendconfigtest.NewDestinationBuilder(destType). + WithID(destinationID). + WithRevisionID(destinationID). + WithConfigOption("account", tc.cred.Account). + WithConfigOption("database", tc.database). + WithConfigOption("warehouse", tc.cred.Warehouse). + WithConfigOption("user", tc.cred.User). + WithConfigOption("cloudProvider", "AWS"). + WithConfigOption("bucketName", tc.cred.BucketName). + WithConfigOption("accessKeyID", tc.cred.AccessKeyID). + WithConfigOption("accessKey", tc.cred.AccessKey). + WithConfigOption("namespace", namespace). + WithConfigOption("enableSSE", false). + WithConfigOption("useRudderStorage", false). + WithConfigOption("syncFrequency", "30") + for k, v := range tc.configOverride { + destinationBuilder = destinationBuilder.WithConfigOption(k, v) + } - data := sqlconnectconfig.Snowflake{ + workspaceConfig := backendconfigtest.NewConfigBuilder(). + WithSource( + backendconfigtest.NewSourceBuilder(). + WithID(sourceID). + WithWriteKey(writeKey). + WithWorkspaceID(workspaceID). + WithConnection(destinationBuilder.Build()). + Build(), + ). + WithWorkspaceID(workspaceID). + Build() + + t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_MAX_PARALLEL_LOADS", "8") + t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_ENABLE_DELETE_BY_JOBS", "true") + t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_SLOW_QUERY_THRESHOLD", "0s") + t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_DEBUG_DUPLICATE_WORKSPACE_IDS", workspaceID) + t.Setenv("RSERVER_WAREHOUSE_SNOWFLAKE_DEBUG_DUPLICATE_TABLES", strings.Join( + []string{ + "identifies", "users", "tracks", "product_track", "pages", "screens", "aliases", "groups", + }, + " ", + )) + + whth.BootstrapSvc(t, workspaceConfig, httpPort, jobsDBPort) + + credentialsJSON, err := json.Marshal(sqlconnectconfig.Snowflake{ Account: tc.cred.Account, User: tc.cred.User, Role: tc.cred.Role, @@ -447,26 +373,17 @@ func TestIntegration(t *testing.T) { UseKeyPairAuth: tc.cred.UseKeyPairAuth, PrivateKey: tc.cred.PrivateKey, PrivateKeyPassphrase: tc.cred.PrivateKeyPassphrase, - } - - credentialsJSON, err := json.Marshal(data) + }) require.NoError(t, err) sqlConnectDB, err := sqlconnect.NewDB("snowflake", credentialsJSON) require.NoError(t, err) db := sqlConnectDB.SqlDB() - + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) t.Cleanup(func() { - var err error - require.Eventuallyf(t, - func() bool { - _, err = db.Exec(fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, tc.schema)) - return err == nil - }, - time.Minute, 100*time.Millisecond, - "error deleting schema: %v", err, - ) + dropSchema(t, db, namespace) }) sqlClient := &client.Client{ @@ -488,18 +405,18 @@ func TestIntegration(t *testing.T) { t.Log("verifying test case 1") userID := tc.customUserID if userID == "" { - userID = testhelper.GetUserId(destType) + userID = whth.GetUserId(destType) } jobRunID := "" if !tc.emptyJobRunID { jobRunID = misc.FastUUID().String() } - ts1 := testhelper.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + ts1 := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, StagingFilesEventsMap: tc.stagingFilesEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, TableUploadsEventsMap: tc.tableUploadsEventsMap, @@ -520,7 +437,7 @@ func TestIntegration(t *testing.T) { t.Log("verifying test case 2") userID = tc.customUserID if userID == "" { - userID = testhelper.GetUserId(destType) + userID = whth.GetUserId(destType) } jobRunID = "" if !tc.emptyJobRunID { @@ -530,12 +447,12 @@ func TestIntegration(t *testing.T) { if whEventsMap == nil { whEventsMap = tc.warehouseEventsMap } - ts2 := testhelper.TestConfig{ - WriteKey: tc.writeKey, - Schema: tc.schema, + ts2 := whth.TestConfig{ + WriteKey: writeKey, + Schema: namespace, Tables: tc.tables, - SourceID: tc.sourceID, - DestinationID: tc.destinationID, + SourceID: sourceID, + DestinationID: destinationID, StagingFilesEventsMap: tc.stagingFilesModifiedEventsMap, LoadFilesEventsMap: tc.loadFilesEventsMap, TableUploadsEventsMap: tc.tableUploadsEventsMap, @@ -561,37 +478,30 @@ func TestIntegration(t *testing.T) { }) t.Run("Validation", func(t *testing.T) { - data := sqlconnectconfig.Snowflake{ + namespace := whth.RandSchema(destType) + + credentialsJSON, err := json.Marshal(sqlconnectconfig.Snowflake{ Account: credentials.Account, User: credentials.User, Role: credentials.Role, Password: credentials.Password, DBName: credentials.Database, Warehouse: credentials.Warehouse, - } - - credentialsJSON, err := json.Marshal(data) + }) require.NoError(t, err) sqlConnectDB, err := sqlconnect.NewDB("snowflake", credentialsJSON) require.NoError(t, err) db := sqlConnectDB.SqlDB() - + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) t.Cleanup(func() { - var err error - require.Eventuallyf(t, - func() bool { - _, err = db.Exec(fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)) - return err == nil - }, - time.Minute, 100*time.Millisecond, - "error deleting schema: %v", err, - ) + dropSchema(t, db, namespace) }) dest := backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", Config: map[string]interface{}{ "account": credentials.Account, "database": credentials.Database, @@ -617,32 +527,23 @@ func TestIntegration(t *testing.T) { }, Name: "snowflake-demo", Enabled: true, - RevisionID: destinationID, + RevisionID: "test_destination_id", } - testhelper.VerifyConfigurationTest(t, dest) + whth.VerifyConfigurationTest(t, dest) }) t.Run("Load Table", func(t *testing.T) { - const ( - sourceID = "test_source_id" - destinationID = "test_destination_id" - workspaceID = "test_workspace_id" - ) - - namespace := testhelper.RandSchema(destType) - ctx := context.Background() + namespace := whth.RandSchema(destType) - data := sqlconnectconfig.Snowflake{ + credentialsJSON, err := json.Marshal(sqlconnectconfig.Snowflake{ Account: credentials.Account, User: credentials.User, Role: credentials.Role, Password: credentials.Password, DBName: credentials.Database, Warehouse: credentials.Warehouse, - } - - credentialsJSON, err := json.Marshal(data) + }) require.NoError(t, err) sqlConnectDB, err := sqlconnect.NewDB("snowflake", credentialsJSON) @@ -650,18 +551,9 @@ func TestIntegration(t *testing.T) { db := sqlConnectDB.SqlDB() require.NoError(t, db.Ping()) - + t.Cleanup(func() { _ = db.Close() }) t.Cleanup(func() { - require.Eventually(t, func() bool { - if _, err := db.Exec(fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)); err != nil { - t.Logf("error deleting schema: %v", err) - return false - } - return true - }, - time.Minute, - time.Second, - ) + dropSchema(t, db, namespace) }) schemaInUpload := model.TableSchema{ @@ -690,10 +582,10 @@ func TestIntegration(t *testing.T) { warehouse := model.Warehouse{ Source: backendconfig.SourceT{ - ID: sourceID, + ID: "test_source_id", }, Destination: backendconfig.DestinationT{ - ID: destinationID, + ID: "test_destination_id", DestinationDefinition: backendconfig.DestinationDefinitionT{ Name: destType, }, @@ -711,7 +603,7 @@ func TestIntegration(t *testing.T) { "namespace": namespace, }, }, - WorkspaceID: workspaceID, + WorkspaceID: "test_workspace_id", Namespace: namespace, } @@ -727,9 +619,9 @@ func TestIntegration(t *testing.T) { require.NoError(t, err) t.Run("schema does not exists", func(t *testing.T) { - tableName := whutils.ToProviderCase(whutils.SNOWFLAKE, "schema_not_exists_test_table") + tableName := whutils.ToProviderCase(destType, "schema_not_exists_test_table") - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, false, false) @@ -743,9 +635,9 @@ func TestIntegration(t *testing.T) { require.Nil(t, loadTableStat) }) t.Run("table does not exists", func(t *testing.T) { - tableName := whutils.ToProviderCase(whutils.SNOWFLAKE, "table_not_exists_test_table") + tableName := whutils.ToProviderCase(destType, "table_not_exists_test_table") - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, false, false) @@ -762,10 +654,10 @@ func TestIntegration(t *testing.T) { require.Nil(t, loadTableStat) }) t.Run("merge", func(t *testing.T) { - tableName := whutils.ToProviderCase(whutils.SNOWFLAKE, "merge_test_table") + tableName := whutils.ToProviderCase(destType, "merge_test_table") t.Run("without dedup", func(t *testing.T) { - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, true, false) @@ -795,7 +687,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsUpdated, int64(0), "2nd copy on the same table with the same data should not have any 'rows_loaded'") - records := testhelper.RetrieveRecordsFromWarehouse(t, sf.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, sf.DB.DB, fmt.Sprintf( `SELECT id, @@ -811,10 +703,10 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, testhelper.SampleTestRecords(), records) + require.Equal(t, whth.SampleTestRecords(), records) }) t.Run("with dedup use new record", func(t *testing.T) { - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/dedup.csv.gz", tableName) loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, false, true) @@ -834,7 +726,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(0)) require.Equal(t, loadTableStat.RowsUpdated, int64(14)) - records := testhelper.RetrieveRecordsFromWarehouse(t, db, + records := whth.RetrieveRecordsFromWarehouse(t, db, fmt.Sprintf(` SELECT id, @@ -853,14 +745,14 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.DedupTestRecords()) + require.Equal(t, records, whth.DedupTestRecords()) }) }) t.Run("append", func(t *testing.T) { - tableName := whutils.ToProviderCase(whutils.SNOWFLAKE, "append_test_table") + tableName := whutils.ToProviderCase(destType, "append_test_table") run := func() { - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/load.csv.gz", tableName) loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, true, false) @@ -895,7 +787,7 @@ func TestIntegration(t *testing.T) { run() run() - records := testhelper.RetrieveRecordsFromWarehouse(t, db, + records := whth.RetrieveRecordsFromWarehouse(t, db, fmt.Sprintf(` SELECT id, @@ -914,10 +806,10 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.AppendTestRecords()) + require.Equal(t, records, whth.AppendTestRecords()) }) t.Run("load file does not exists", func(t *testing.T) { - tableName := whutils.ToProviderCase(whutils.SNOWFLAKE, "load_file_not_exists_test_table") + tableName := whutils.ToProviderCase(destType, "load_file_not_exists_test_table") loadFiles := []whutils.LoadFile{{ Location: "https://bucket.s3.amazonaws.com/rudder-warehouse-load-objects/load_file_not_exists_test_table/test_source_id/0ef75cb0-3fd0-4408-98b9-2bea9e476916-load_file_not_exists_test_table/load.csv.gz", @@ -939,9 +831,9 @@ func TestIntegration(t *testing.T) { require.Nil(t, loadTableStat) }) t.Run("mismatch in number of columns", func(t *testing.T) { - tableName := whutils.ToProviderCase(whutils.SNOWFLAKE, "mismatch_columns_test_table") + tableName := whutils.ToProviderCase(destType, "mismatch_columns_test_table") - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-columns.csv.gz", tableName) loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, false, false) @@ -961,7 +853,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(14)) require.Equal(t, loadTableStat.RowsUpdated, int64(0)) - records := testhelper.RetrieveRecordsFromWarehouse(t, sf.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, sf.DB.DB, fmt.Sprintf(` SELECT id, @@ -980,12 +872,12 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.SampleTestRecords()) + require.Equal(t, records, whth.SampleTestRecords()) }) t.Run("mismatch in schema", func(t *testing.T) { - tableName := whutils.ToProviderCase(whutils.SNOWFLAKE, "mismatch_schema_test_table") + tableName := whutils.ToProviderCase(destType, "mismatch_schema_test_table") - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/mismatch-schema.csv.gz", tableName) loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} mockUploader := newMockUploader(t, loadFiles, tableName, schemaInUpload, schemaInWarehouse, false, false) @@ -1005,12 +897,12 @@ func TestIntegration(t *testing.T) { require.Nil(t, loadTableStat) }) t.Run("discards", func(t *testing.T) { - tableName := whutils.ToProviderCase(whutils.SNOWFLAKE, whutils.DiscardsTable) + tableName := whutils.ToProviderCase(destType, whutils.DiscardsTable) - uploadOutput := testhelper.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) + uploadOutput := whth.UploadLoadFile(t, fm, "../testdata/discards.csv.gz", tableName) discardsSchema := lo.MapKeys(whutils.DiscardsSchema, func(_, key string) string { - return whutils.ToProviderCase(whutils.SNOWFLAKE, key) + return whutils.ToProviderCase(destType, key) }) loadFiles := []whutils.LoadFile{{Location: uploadOutput.Location}} @@ -1031,7 +923,7 @@ func TestIntegration(t *testing.T) { require.Equal(t, loadTableStat.RowsInserted, int64(6)) require.Equal(t, loadTableStat.RowsUpdated, int64(0)) - records := testhelper.RetrieveRecordsFromWarehouse(t, sf.DB.DB, + records := whth.RetrieveRecordsFromWarehouse(t, sf.DB.DB, fmt.Sprintf(` SELECT COLUMN_NAME, @@ -1048,9 +940,116 @@ func TestIntegration(t *testing.T) { tableName, ), ) - require.Equal(t, records, testhelper.DiscardTestRecords()) + require.Equal(t, records, whth.DiscardTestRecords()) }) }) + + t.Run("Delete By", func(t *testing.T) { + ctx := context.Background() + namespace := whth.RandSchema(destType) + + credentialsJSON, err := json.Marshal(sqlconnectconfig.Snowflake{ + Account: credentials.Account, + User: credentials.User, + Role: credentials.Role, + Password: credentials.Password, + DBName: credentials.Database, + Warehouse: credentials.Warehouse, + }) + require.NoError(t, err) + + sqlConnectDB, err := sqlconnect.NewDB("snowflake", credentialsJSON) + require.NoError(t, err) + + db := sqlConnectDB.SqlDB() + require.NoError(t, db.Ping()) + t.Cleanup(func() { _ = db.Close() }) + t.Cleanup(func() { + dropSchema(t, db, namespace) + }) + + conf := config.New() + conf.Set("Warehouse.snowflake.enableDeleteByJobs", true) + + sf := snowflake.New(conf, logger.NOP, stats.NOP) + sf.DB = sqlquerywrapper.New(db) + sf.Namespace = namespace + + now := time.Now() + + _, err = sf.DB.ExecContext(ctx, fmt.Sprintf(`CREATE SCHEMA IF NOT EXISTS %s`, namespace)) + require.NoError(t, err, "should create schema") + + _, err = sf.DB.ExecContext(ctx, "CREATE TABLE "+namespace+".TEST_TABLE (id INT, context_sources_job_run_id STRING, context_sources_task_run_id STRING, context_source_id STRING, received_at DATETIME)") + require.NoError(t, err, "should create table") + + _, err = sf.DB.ExecContext(ctx, "INSERT INTO "+namespace+".TEST_TABLE VALUES (1, 'job_run_id_2', 'task_run_id_1_2', 'source_id_1', ?)", now.Add(-time.Hour)) + require.NoError(t, err, "should insert records") + _, err = sf.DB.ExecContext(ctx, "INSERT INTO "+namespace+".TEST_TABLE VALUES (2, 'job_run_id_2', 'task_run_id_1', 'source_id_2', ?)", now.Add(-time.Hour)) + require.NoError(t, err, "should insert records") + + require.NoError(t, sf.DeleteBy(ctx, []string{"TEST_TABLE"}, whutils.DeleteByParams{ + SourceId: "source_id_1", + JobRunId: "new_job_run_id", + TaskRunId: "new_task_job_run_id", + StartTime: now, + }), "should delete records") + + rows, err := sf.DB.QueryContext(ctx, "SELECT id FROM "+namespace+".TEST_TABLE") + require.NoError(t, err, "should see a successful query for ids") + + var recordIDs []int + for rows.Next() { + var id int + err := rows.Scan(&id) + require.NoError(t, err, "should scan rows") + + recordIDs = append(recordIDs, id) + } + require.NoError(t, rows.Err()) + require.Equal(t, []int{2}, recordIDs, "got the correct set of ids after deletion") + + require.NoError(t, sf.DeleteBy(ctx, []string{"TEST_TABLE"}, whutils.DeleteByParams{ + SourceId: "source_id_2", + JobRunId: "new_job_run_id", + TaskRunId: "new_task_job_run_id", + StartTime: time.Time{}, + }), "delete should succeed even if start time is zero value - no records must be deleted") + + rows, err = sf.DB.QueryContext(ctx, "SELECT id FROM "+namespace+".TEST_TABLE") + require.NoError(t, err, "should see a successful query for ids") + + var ids1 []int + for rows.Next() { + var id int + err := rows.Scan(&id) + require.NoError(t, err, "should scan rows") + + ids1 = append(ids1, id) + } + require.NoError(t, rows.Err()) + require.Equal(t, []int{2}, ids1, "got the same set of ids after deletion") + + require.NoError(t, sf.DeleteBy(ctx, []string{"TEST_TABLE"}, whutils.DeleteByParams{ + SourceId: "source_id_2", + JobRunId: "new_job_run_id", + TaskRunId: "new_task_job_run_id", + StartTime: now, + }), "should delete records") + + rows, err = sf.DB.QueryContext(ctx, "SELECT id FROM "+namespace+".TEST_TABLE") + require.NoError(t, err, "should see a successful query for ids") + var ids2 []int + for rows.Next() { + var id int + err := rows.Scan(&id) + require.NoError(t, err, "should scan rows") + + ids2 = append(ids2, id) + } + require.NoError(t, rows.Err()) + require.Empty(t, ids2, "no more rows left") + }) } func TestSnowflake_ShouldMerge(t *testing.T) { @@ -1156,6 +1155,24 @@ func TestSnowflake_ShouldMerge(t *testing.T) { } } +func dropSchema(t *testing.T, db *sql.DB, namespace string) { + t.Helper() + t.Log("dropping schema", namespace) + + require.Eventually(t, + func() bool { + _, err := db.ExecContext(context.Background(), fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)) + if err != nil { + t.Logf("error deleting schema %q: %v", namespace, err) + return false + } + return true + }, + time.Minute, + time.Second, + ) +} + func newMockUploader( t testing.TB, loadFiles []whutils.LoadFile, @@ -1184,128 +1201,3 @@ func newMockUploader( return mockUploader } - -func TestSnowflake_DeleteBy(t *testing.T) { - if _, exists := os.LookupEnv(testKey); !exists { - t.Skipf("Skipping %s as %s is not set", t.Name(), testKey) - } - namespace := testhelper.RandSchema(whutils.SNOWFLAKE) - - ctx := context.Background() - - credentials, err := getSnowflakeTestCredentials(testKey) - require.NoError(t, err) - - data := sqlconnectconfig.Snowflake{ - Account: credentials.Account, - User: credentials.User, - Role: credentials.Role, - Password: credentials.Password, - DBName: credentials.Database, - Warehouse: credentials.Warehouse, - } - - credentialsJSON, err := json.Marshal(data) - require.NoError(t, err) - - sqlConnectDB, err := sqlconnect.NewDB("snowflake", credentialsJSON) - require.NoError(t, err) - - db := sqlConnectDB.SqlDB() - require.NoError(t, db.Ping()) - - t.Cleanup(func() { - require.Eventually(t, func() bool { - if _, err := db.Exec(fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)); err != nil { - t.Logf("error deleting schema: %v", err) - return false - } - return true - }, - time.Minute, - time.Second, - ) - }) - - conf := config.New() - conf.Set("Warehouse.snowflake.enableDeleteByJobs", true) - - sf := snowflake.New(conf, logger.NOP, stats.NOP) - sf.DB = sqlquerywrapper.New(db) - sf.Namespace = namespace - - now := time.Now() - - _, err = sf.DB.ExecContext(ctx, fmt.Sprintf(`CREATE SCHEMA IF NOT EXISTS %s`, namespace)) - require.NoError(t, err, "should create schema") - - _, err = sf.DB.ExecContext(ctx, "CREATE TABLE "+namespace+".TEST_TABLE (id INT, context_sources_job_run_id STRING, context_sources_task_run_id STRING, context_source_id STRING, received_at DATETIME)") - require.NoError(t, err, "should create table") - - _, err = sf.DB.ExecContext(ctx, "INSERT INTO "+namespace+".TEST_TABLE VALUES (1, 'job_run_id_2', 'task_run_id_1_2', 'source_id_1', ?)", now.Add(-time.Hour)) - require.NoError(t, err, "should insert records") - _, err = sf.DB.ExecContext(ctx, "INSERT INTO "+namespace+".TEST_TABLE VALUES (2, 'job_run_id_2', 'task_run_id_1', 'source_id_2', ?)", now.Add(-time.Hour)) - require.NoError(t, err, "should insert records") - - require.NoError(t, sf.DeleteBy(ctx, []string{"TEST_TABLE"}, whutils.DeleteByParams{ - SourceId: "source_id_1", - JobRunId: "new_job_run_id", - TaskRunId: "new_task_job_run_id", - StartTime: now, - }), "should delete records") - - rows, err := sf.DB.QueryContext(ctx, "SELECT id FROM "+namespace+".TEST_TABLE") - require.NoError(t, err, "should see a successful query for ids") - - var recordIDs []int - for rows.Next() { - var id int - err := rows.Scan(&id) - require.NoError(t, err, "should scan rows") - - recordIDs = append(recordIDs, id) - } - require.NoError(t, rows.Err()) - require.Equal(t, []int{2}, recordIDs, "got the correct set of ids after deletion") - - require.NoError(t, sf.DeleteBy(ctx, []string{"TEST_TABLE"}, whutils.DeleteByParams{ - SourceId: "source_id_2", - JobRunId: "new_job_run_id", - TaskRunId: "new_task_job_run_id", - StartTime: time.Time{}, - }), "delete should succeed even if start time is zero value - no records must be deleted") - - rows, err = sf.DB.QueryContext(ctx, "SELECT id FROM "+namespace+".TEST_TABLE") - require.NoError(t, err, "should see a successful query for ids") - - var ids1 []int - for rows.Next() { - var id int - err := rows.Scan(&id) - require.NoError(t, err, "should scan rows") - - ids1 = append(ids1, id) - } - require.NoError(t, rows.Err()) - require.Equal(t, []int{2}, ids1, "got the same set of ids after deletion") - - require.NoError(t, sf.DeleteBy(ctx, []string{"TEST_TABLE"}, whutils.DeleteByParams{ - SourceId: "source_id_2", - JobRunId: "new_job_run_id", - TaskRunId: "new_task_job_run_id", - StartTime: now, - }), "should delete records") - - rows, err = sf.DB.QueryContext(ctx, "SELECT id FROM "+namespace+".TEST_TABLE") - require.NoError(t, err, "should see a successful query for ids") - var ids2 []int - for rows.Next() { - var id int - err := rows.Scan(&id) - require.NoError(t, err, "should scan rows") - - ids2 = append(ids2, id) - } - require.NoError(t, rows.Err()) - require.Empty(t, ids2, "no more rows left") -} diff --git a/warehouse/integrations/snowflake/testdata/template.json b/warehouse/integrations/snowflake/testdata/template.json deleted file mode 100644 index 2e56609fa1f..00000000000 --- a/warehouse/integrations/snowflake/testdata/template.json +++ /dev/null @@ -1,782 +0,0 @@ -{ - "enableMetrics": false, - "workspaceId": "{{.workspaceID}}", - "sources": [ - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "id": "{{.sourceID}}", - "name": "snowflake-integration", - "writeKey": "{{.writeKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "24p1CMAkx18KwNbFDXlR7sUhqaa", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T09:30:27.073Z", - "updatedAt": "2022-02-28T18:41:06.362Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "account": "{{.account}}", - "database": "{{.database}}", - "warehouse": "{{.warehouse}}", - "user": "{{.user}}", - "password": "{{.password}}", - "cloudProvider": "AWS", - "bucketName": "{{.bucketName}}", - "storageIntegration": "", - "accessKeyID": "{{.accessKeyID}}", - "accessKey": "{{.accessKey}}", - "namespace": "{{.namespace}}", - "prefix": "snowflake-prefix", - "syncFrequency": "30", - "enableSSE": false, - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.destinationID}}", - "name": "snowflake-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T23:19:58.278Z", - "updatedAt": "2022-05-17T08:18:33.587Z", - "revisionId": "{{.destinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "account", - "database", - "warehouse", - "user", - "password", - "cloudProvider", - "bucketName", - "containerName", - "storageIntegration", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "namespace", - "prefix", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1XjvXnzw34UMAz1YOuKqL1kwzh6", - "name": "SNOWFLAKE", - "displayName": "Snowflake", - "category": "warehouse", - "createdAt": "2020-02-13T05:39:20.184Z", - "updatedAt": "2022-02-08T06:46:45.432Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "id": "{{.caseSensitiveSourceID}}", - "name": "snowflake-case-sensitive-integration", - "writeKey": "{{.caseSensitiveWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "24p1CMAkx18KwNbFDXlR7sUhqaa", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T09:30:27.073Z", - "updatedAt": "2022-02-28T18:41:06.362Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "account": "{{.account}}", - "database": "{{.caseSensitiveDatabase}}", - "warehouse": "{{.warehouse}}", - "user": "{{.user}}", - "password": "{{.password}}", - "cloudProvider": "AWS", - "bucketName": "{{.bucketName}}", - "storageIntegration": "", - "accessKeyID": "{{.accessKeyID}}", - "accessKey": "{{.accessKey}}", - "namespace": "{{.caseSensitiveNamespace}}", - "prefix": "snowflake-prefix", - "syncFrequency": "30", - "enableSSE": false, - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.caseSensitiveDestinationID}}", - "name": "snowflake-case-sensitive-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T23:19:58.278Z", - "updatedAt": "2022-05-17T08:18:33.587Z", - "revisionId": "{{.caseSensitiveDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "account", - "database", - "warehouse", - "user", - "password", - "cloudProvider", - "bucketName", - "containerName", - "storageIntegration", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "namespace", - "prefix", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1XjvXnzw34UBNA1YOuKqL1kwzh6", - "name": "SNOWFLAKE", - "displayName": "Snowflake", - "category": "warehouse", - "createdAt": "2020-02-13T05:39:20.184Z", - "updatedAt": "2022-02-08T06:46:45.432Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "id": "{{.keypairEncryptedSourceID}}", - "name": "snowflake-integration", - "writeKey": "{{.keypairEncryptedWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "24p1CMAkx18KwNbFDXlR7sUhqaa", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T09:30:27.073Z", - "updatedAt": "2022-02-28T18:41:06.362Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "account": "{{.account}}", - "database": "{{.database}}", - "warehouse": "{{.warehouse}}", - "user": "{{.keypairEncryptedUser}}", - "cloudProvider": "AWS", - "bucketName": "{{.bucketName}}", - "storageIntegration": "", - "accessKeyID": "{{.accessKeyID}}", - "accessKey": "{{.accessKey}}", - "namespace": "{{.keypairEncryptedNamespace}}", - "prefix": "snowflake-prefix", - "syncFrequency": "30", - "enableSSE": false, - "useKeyPairAuth": true, - "privateKey": "{{.keypairEncryptedPrivateKey}}", - "privateKeyPassphrase": "{{.keypairEncryptedPassphrase}}", - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.keypairEncryptedDestinationID}}", - "name": "snowflake-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T23:19:58.278Z", - "updatedAt": "2022-05-17T08:18:33.587Z", - "revisionId": "{{.keypairEncryptedDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "account", - "database", - "warehouse", - "user", - "password", - "cloudProvider", - "bucketName", - "containerName", - "storageIntegration", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "namespace", - "prefix", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1XjvXnzw34UMAz1YOuKqL1kwzh6", - "name": "SNOWFLAKE", - "displayName": "Snowflake", - "category": "warehouse", - "createdAt": "2020-02-13T05:39:20.184Z", - "updatedAt": "2022-02-08T06:46:45.432Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "id": "{{.keypairUnencryptedSourceID}}", - "name": "snowflake-integration", - "writeKey": "{{.keypairUnencryptedWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "24p1CMAkx18KwNbFDXlR7sUhqaa", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T09:30:27.073Z", - "updatedAt": "2022-02-28T18:41:06.362Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "account": "{{.account}}", - "database": "{{.database}}", - "warehouse": "{{.warehouse}}", - "user": "{{.keypairUnencryptedUser}}", - "cloudProvider": "AWS", - "bucketName": "{{.bucketName}}", - "storageIntegration": "", - "accessKeyID": "{{.accessKeyID}}", - "accessKey": "{{.accessKey}}", - "namespace": "{{.keypairUnencryptedNamespace}}", - "prefix": "snowflake-prefix", - "syncFrequency": "30", - "enableSSE": false, - "useKeyPairAuth": true, - "privateKey": "{{.keypairUnencryptedPrivateKey}}", - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.keypairUnencryptedDestinationID}}", - "name": "snowflake-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T23:19:58.278Z", - "updatedAt": "2022-05-17T08:18:33.587Z", - "revisionId": "{{.keypairUnencryptedDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "account", - "database", - "warehouse", - "user", - "password", - "cloudProvider", - "bucketName", - "containerName", - "storageIntegration", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "namespace", - "prefix", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1XjvXnzw34UMAz1YOuKqL1kwzh6", - "name": "SNOWFLAKE", - "displayName": "Snowflake", - "category": "warehouse", - "createdAt": "2020-02-13T05:39:20.184Z", - "updatedAt": "2022-02-08T06:46:45.432Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "isSampleSource": true, - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "liveEventsConfig": { - "eventUpload": false, - "eventUploadTS": 1646073666353 - }, - "id": "{{.rbacSourceID}}", - "name": "snowflake-rbac-integration", - "writeKey": "{{.rbacWriteKey}}", - "enabled": true, - "sourceDefinitionId": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "createdBy": "24p1CMAkx18KwNbFDXlR7sUhqaa", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T09:30:27.073Z", - "updatedAt": "2022-02-28T18:41:06.362Z", - "destinations": [ - { - "config": { - {{.preferAppend}} - "account": "{{.rbacAccount}}", - "database": "{{.rbacDatabase}}", - "warehouse": "{{.rbacWarehouse}}", - "user": "{{.rbacUser}}", - "role": "{{.rbacRole}}", - "password": "{{.rbacPassword}}", - "cloudProvider": "AWS", - "bucketName": "{{.rbacBucketName}}", - "storageIntegration": "", - "accessKeyID": "{{.rbacAccessKeyID}}", - "accessKey": "{{.rbacAccessKey}}", - "namespace": "{{.rbacNamespace}}", - "prefix": "snowflake-prefix", - "syncFrequency": "30", - "enableSSE": false, - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.rbacDestinationID}}", - "name": "snowflake-rbac-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T23:19:58.278Z", - "updatedAt": "2022-05-17T08:18:33.587Z", - "revisionId": "{{.rbacDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "account", - "database", - "warehouse", - "user", - "password", - "cloudProvider", - "bucketName", - "containerName", - "storageIntegration", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "namespace", - "prefix", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1XjvXnzw34KjA1YOuKqL1kwzh6", - "name": "SNOWFLAKE", - "displayName": "Snowflake", - "category": "warehouse", - "createdAt": "2020-02-13T05:39:20.184Z", - "updatedAt": "2022-02-08T06:46:45.432Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "sourceDefinition": { - "options": null, - "id": "1dCzCUAtpWDzNxgGUYzq9sZdZZB", - "name": "HTTP", - "displayName": "HTTP", - "category": "", - "createdAt": "2020-06-12T06:35:35.962Z", - "updatedAt": "2020-06-12T06:35:35.962Z" - }, - "dgSourceTrackingPlanConfig": null - }, - { - "config": { - "config": { - "row_batch_size": 200, - "credentials": { - "auth_type": "Client", - "accountId": "29hOyXzmdF9rz7yR2FTq4pohyXL" - }, - "spreadsheet_id": "1bKQpN-KkhYZd4eqUUoq3Tec6HrJzgqSc8jwVvajnpk8" - }, - "schedule": { - "type": "manual", - "every": 0, - "unit": "minutes" - }, - "prefix": "SGS5" - }, - "liveEventsConfig": {}, - "id": "{{.sourcesSourceID}}", - "name": "snowflake-sources-integration", - "writeKey": "{{.sourcesWriteKey}}", - "enabled": true, - "sourceDefinitionId": "29seNpaVfhMp7YVpiBUszPOvmO1", - "createdBy": "279BPpjT6BGqKKhT5qAZuUVZa1h", - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "transient": false, - "secretVersion": null, - "createdAt": "2022-08-23T00:21:18.366Z", - "updatedAt": "2022-08-23T00:21:18.366Z", - "sourceDefinition": { - "options": { - "auth": { - "provider": "Google", - "oauthRole": "google_sheets" - }, - "image": "source-google-sheets:v2", - "isBeta": true - }, - "id": "29seNpaVfhMp7YVpiBUszPOvmO1", - "name": "singer-google-sheets", - "displayName": "Singer Google Sheets", - "category": "singer-protocol", - "createdAt": "2022-05-30T04:53:02.188Z", - "updatedAt": "2022-05-30T04:53:02.188Z" - }, - "destinations": [ - { - "config": { - {{.preferAppend}} - "account": "{{.account}}", - "database": "{{.database}}", - "warehouse": "{{.warehouse}}", - "user": "{{.user}}", - "password": "{{.password}}", - "cloudProvider": "AWS", - "bucketName": "{{.bucketName}}", - "storageIntegration": "", - "accessKeyID": "{{.accessKeyID}}", - "accessKey": "{{.accessKey}}", - "namespace": "{{.sourcesNamespace}}", - "prefix": "snowflake-prefix", - "syncFrequency": "30", - "enableSSE": false, - "useRudderStorage": false - }, - "liveEventsConfig": {}, - "secretConfig": {}, - "id": "{{.sourcesDestinationID}}", - "name": "snowflake-sources-demo", - "enabled": true, - "workspaceId": "{{.workspaceID}}", - "deleted": false, - "createdAt": "2022-02-08T23:19:58.278Z", - "updatedAt": "2022-05-17T08:18:33.587Z", - "revisionId": "{{.sourcesDestinationID}}", - "transformations": [], - "destinationDefinition": { - "config": { - "destConfig": { - "defaultConfig": [ - "account", - "database", - "warehouse", - "user", - "password", - "cloudProvider", - "bucketName", - "containerName", - "storageIntegration", - "accessKeyID", - "accessKey", - "accountName", - "accountKey", - "credentials", - "namespace", - "prefix", - "syncFrequency", - "syncStartAt", - "enableSSE", - "excludeWindow", - "useRudderStorage" - ] - }, - "secretKeys": [ - "password", - "accessKeyID", - "accessKey" - ], - "excludeKeys": [], - "includeKeys": [], - "transformAt": "processor", - "transformAtV1": "processor", - "supportedSourceTypes": [ - "android", - "ios", - "web", - "unity", - "amp", - "cloud", - "reactnative", - "cloudSource", - "flutter", - "cordova" - ], - "saveDestinationResponse": true - }, - "responseRules": null, - "options": null, - "id": "1XjvXnzw34UMAz1YOuKqL1kwzh6", - "name": "SNOWFLAKE", - "displayName": "Snowflake", - "category": "warehouse", - "createdAt": "2020-02-13T05:39:20.184Z", - "updatedAt": "2022-02-08T06:46:45.432Z" - }, - "isConnectionEnabled": true, - "isProcessorEnabled": true - } - ], - "dgSourceTrackingPlanConfig": null - } - ], - "libraries": [ - { - "versionId": "23Uxw7QEiOg8e0KkQV8LmNfWaWh" - } - ] -} diff --git a/warehouse/integrations/testhelper/service.go b/warehouse/integrations/testhelper/service.go new file mode 100644 index 00000000000..3c92ba941e1 --- /dev/null +++ b/warehouse/integrations/testhelper/service.go @@ -0,0 +1,84 @@ +package testhelper + +import ( + "context" + "fmt" + "strconv" + "testing" + "time" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + "github.com/rudderlabs/rudder-server/runner" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" + "github.com/rudderlabs/rudder-server/testhelper/health" +) + +func BootstrapSvc(t *testing.T, workspaceConfig backendconfig.ConfigT, httpPort, jobsDBPort int) { + bcServer := backendconfigtest. + NewBuilder(). + WithWorkspaceConfig(workspaceConfig). + Build() + t.Cleanup(func() { + bcServer.Close() + }) + + enhanceWithDefaultEnvs(t) + + t.Setenv("JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) + t.Setenv("WAREHOUSE_JOBS_DB_PORT", strconv.Itoa(jobsDBPort)) + t.Setenv("RSERVER_WAREHOUSE_WEB_PORT", strconv.Itoa(httpPort)) + t.Setenv("WORKSPACE_TOKEN", "token") + t.Setenv("CONFIG_BACKEND_URL", bcServer.URL) + + svcDone := make(chan struct{}) + ctx, cancel := context.WithCancel(context.Background()) + + go func() { + r := runner.New(runner.ReleaseInfo{EnterpriseToken: "TOKEN"}) + _ = r.Run(ctx, []string{"integration-test"}) + close(svcDone) + }() + + t.Cleanup(func() { <-svcDone }) + t.Cleanup(cancel) + + serviceHealthEndpoint := fmt.Sprintf("http://localhost:%d/health", httpPort) + health.WaitUntilReady(ctx, t, + serviceHealthEndpoint, time.Minute, time.Second, "serviceHealthEndpoint", + ) +} + +func enhanceWithDefaultEnvs(t testing.TB) { + t.Setenv("JOBS_DB_HOST", jobsDBHost) + t.Setenv("JOBS_DB_NAME", jobsDBDatabase) + t.Setenv("JOBS_DB_DB_NAME", jobsDBDatabase) + t.Setenv("JOBS_DB_USER", jobsDBUser) + t.Setenv("JOBS_DB_PASSWORD", jobsDBPassword) + t.Setenv("JOBS_DB_SSL_MODE", "disable") + t.Setenv("WAREHOUSE_JOBS_DB_HOST", jobsDBHost) + t.Setenv("WAREHOUSE_JOBS_DB_NAME", jobsDBDatabase) + t.Setenv("WAREHOUSE_JOBS_DB_DB_NAME", jobsDBDatabase) + t.Setenv("WAREHOUSE_JOBS_DB_USER", jobsDBUser) + t.Setenv("WAREHOUSE_JOBS_DB_PASSWORD", jobsDBPassword) + t.Setenv("WAREHOUSE_JOBS_DB_SSL_MODE", "disable") + t.Setenv("GO_ENV", "production") + t.Setenv("LOG_LEVEL", "INFO") + t.Setenv("INSTANCE_ID", "1") + t.Setenv("ALERT_PROVIDER", "pagerduty") + t.Setenv("CONFIG_PATH", "../../../config/config.yaml") + t.Setenv("RSERVER_WAREHOUSE_WAREHOUSE_SYNC_FREQ_IGNORE", "true") + t.Setenv("RSERVER_WAREHOUSE_UPLOAD_FREQ_IN_S", "10") + t.Setenv("RSERVER_WAREHOUSE_ENABLE_JITTER_FOR_SYNCS", "false") + t.Setenv("RSERVER_WAREHOUSE_ENABLE_IDRESOLUTION", "true") + t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_FROM_FILE", "false") + t.Setenv("RSERVER_ADMIN_SERVER_ENABLED", "false") + t.Setenv("RUDDER_ADMIN_PASSWORD", "password") + t.Setenv("RUDDER_GRACEFUL_SHUTDOWN_TIMEOUT_EXIT", "false") + t.Setenv("RSERVER_LOGGER_CONSOLE_JSON_FORMAT", "true") + t.Setenv("RSERVER_WAREHOUSE_MODE", "master_and_slave") + t.Setenv("RSERVER_ENABLE_STATS", "false") + t.Setenv("RUDDER_TMPDIR", t.TempDir()) + if testing.Verbose() { + t.Setenv("LOG_LEVEL", "DEBUG") + } +} diff --git a/warehouse/integrations/testhelper/setup.go b/warehouse/integrations/testhelper/setup.go index 1519cee36a0..e08ef9fec21 100644 --- a/warehouse/integrations/testhelper/setup.go +++ b/warehouse/integrations/testhelper/setup.go @@ -217,40 +217,6 @@ func WithConstantRetries(operation func() error) error { return err } -func EnhanceWithDefaultEnvs(t testing.TB) { - t.Setenv("JOBS_DB_HOST", jobsDBHost) - t.Setenv("JOBS_DB_NAME", jobsDBDatabase) - t.Setenv("JOBS_DB_DB_NAME", jobsDBDatabase) - t.Setenv("JOBS_DB_USER", jobsDBUser) - t.Setenv("JOBS_DB_PASSWORD", jobsDBPassword) - t.Setenv("JOBS_DB_SSL_MODE", "disable") - t.Setenv("WAREHOUSE_JOBS_DB_HOST", jobsDBHost) - t.Setenv("WAREHOUSE_JOBS_DB_NAME", jobsDBDatabase) - t.Setenv("WAREHOUSE_JOBS_DB_DB_NAME", jobsDBDatabase) - t.Setenv("WAREHOUSE_JOBS_DB_USER", jobsDBUser) - t.Setenv("WAREHOUSE_JOBS_DB_PASSWORD", jobsDBPassword) - t.Setenv("WAREHOUSE_JOBS_DB_SSL_MODE", "disable") - t.Setenv("GO_ENV", "production") - t.Setenv("LOG_LEVEL", "INFO") - t.Setenv("INSTANCE_ID", "1") - t.Setenv("ALERT_PROVIDER", "pagerduty") - t.Setenv("CONFIG_PATH", "../../../config/config.yaml") - t.Setenv("RSERVER_WAREHOUSE_WAREHOUSE_SYNC_FREQ_IGNORE", "true") - t.Setenv("RSERVER_WAREHOUSE_UPLOAD_FREQ_IN_S", "10") - t.Setenv("RSERVER_WAREHOUSE_ENABLE_JITTER_FOR_SYNCS", "false") - t.Setenv("RSERVER_WAREHOUSE_ENABLE_IDRESOLUTION", "true") - t.Setenv("RSERVER_BACKEND_CONFIG_CONFIG_FROM_FILE", "true") - t.Setenv("RUDDER_ADMIN_PASSWORD", "password") - t.Setenv("RUDDER_GRACEFUL_SHUTDOWN_TIMEOUT_EXIT", "false") - t.Setenv("RSERVER_LOGGER_CONSOLE_JSON_FORMAT", "true") - t.Setenv("RSERVER_WAREHOUSE_MODE", "master_and_slave") - t.Setenv("RSERVER_ENABLE_STATS", "false") - t.Setenv("RUDDER_TMPDIR", t.TempDir()) - if testing.Verbose() { - t.Setenv("LOG_LEVEL", "DEBUG") - } -} - func UploadLoadFile( t testing.TB, fm filemanager.FileManager,