diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7f43280425..7ab2bc96f2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -95,6 +95,7 @@ jobs: - source-sqlserver - source-test - source-azure-blob-storage + - materialize-azure-fabric-warehouse - materialize-bigquery - materialize-databricks - materialize-dynamodb @@ -277,6 +278,7 @@ jobs: "materialize-motherduck", "materialize-snowflake", "materialize-databricks", + "materialize-azure-fabric-warehouse", "materialize-bigquery", "materialize-redshift", "materialize-s3-iceberg" diff --git a/filesink/format.go b/filesink/format.go index d45d28bf0f..bb67056f0c 100644 --- a/filesink/format.go +++ b/filesink/format.go @@ -45,28 +45,16 @@ func NewParquetStreamEncoder(cfg ParquetConfig, b *pf.MaterializationSpec_Bindin } type CsvConfig struct { - Delimiter string `json:"delimiter,omitempty" jsonschema:"title=Delimiter,description=Character to separate columns within a row. Defaults to a comma if blank. Must be a single character with a byte length of 1." jsonschema_extras:"order=0"` - NullString string `json:"nullString,omitempty" jsonschema:"title=Null String,description=String to use to represent NULL values. Defaults to an empty string if blank." jsonschema_extras:"order=1"` - SkipHeaders bool `json:"skipHeaders,omitempty" jsonschema:"title=Skip Headers,description=Do not write headers to files." jsonschema_extras:"order=2"` + SkipHeaders bool `json:"skipHeaders,omitempty" jsonschema:"title=Skip Headers,description=Do not write headers to files." jsonschema_extras:"order=2"` } func (c CsvConfig) Validate() error { - if r := []rune(c.Delimiter); len(r) > 1 { - return fmt.Errorf("delimiter %q must be a single rune (byte length of 1): got byte length of %d", c.Delimiter, len(r)) - } - return nil } func NewCsvStreamEncoder(cfg CsvConfig, b *pf.MaterializationSpec_Binding, w io.WriteCloser) StreamEncoder { var opts []enc.CsvOption - if cfg.Delimiter != "" { - opts = append(opts, enc.WithCsvDelimiter([]rune(cfg.Delimiter)[0])) // already validated to be 1 byte in length - } - if cfg.NullString != "" { - opts = append(opts, enc.WithCsvNullString(cfg.NullString)) - } if cfg.SkipHeaders { opts = append(opts, enc.WithCsvSkipHeaders()) } diff --git a/go.mod b/go.mod index 5a1f5285ca..4a2bd86237 100644 --- a/go.mod +++ b/go.mod @@ -51,7 +51,7 @@ require ( github.com/klauspost/compress v1.17.9 github.com/marcboeker/go-duckdb v1.8.0 github.com/mattn/go-sqlite3 v2.0.3+incompatible - github.com/microsoft/go-mssqldb v0.21.0 + github.com/microsoft/go-mssqldb v1.8.0 github.com/minio/highwayhash v1.0.2 github.com/mitchellh/mapstructure v1.5.0 github.com/pinecone-io/go-pinecone v1.1.1 @@ -104,7 +104,7 @@ require ( github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 // indirect - github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2 // indirect github.com/DataDog/appsec-internal-go v1.6.0 // indirect github.com/DataDog/datadog-agent/pkg/obfuscate v0.54.0 // indirect github.com/DataDog/datadog-agent/pkg/remoteconfig/state v0.54.0 // indirect diff --git a/go.sum b/go.sum index b429f9f001..30883b173f 100644 --- a/go.sum +++ b/go.sum @@ -79,16 +79,16 @@ github.com/99designs/keyring v1.2.2 h1:pZd3neh/EmUzWONb35LxQfvuY7kiSXAq3HQd97+XB github.com/99designs/keyring v1.2.2/go.mod h1:wes/FrByc8j7lFOAGLGSNEg8f/PaI3cgTBqhFkHUrPk= github.com/Azure/azure-pipeline-go v0.2.3 h1:7U9HBg1JFK3jHl5qmo4CTZKFTVgMwdFHMVtCdfBE21U= github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.0.0/go.mod h1:uGG2W01BaETf0Ozp+QxxKJdMBNRWPdstHG0Fmdwn1/U= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.1.2/go.mod h1:uGG2W01BaETf0Ozp+QxxKJdMBNRWPdstHG0Fmdwn1/U= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 h1:E+OJmp2tPvt1W+amx48v1eqbjDYsgN+RzP4q16yV5eM= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1/go.mod h1:a6xsAQUZg+VsS3TJ05SRp524Hs4pZ/AeFSr5ENf0Yjo= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.1/go.mod h1:gLa1CL2RNE4s7M3yopJ/p0iq5DdY6Yv5ZUt9MTRZOQM= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0 h1:U2rTu3Ef+7w9FHKIAXM6ZyqF3UOWJZ12zIm8zECAFfg= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.0.0/go.mod h1:eWRD7oawr1Mu1sLCawqVc0CUiF43ia3qQMxLscsKQ9w= github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 h1:jBQA3cKT4L2rWMpgE7Yt3Hwh2aUj8KXjIGLxjHeYNNo= github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0/go.mod h1:4OG6tQ9EOP/MT0NMjDlRzWoVFxfu9rN9B2X+tlSVktg= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azkeys v1.0.1 h1:MyVTgWR8qd/Jw1Le0NZebGBUCLbtak3bJ3z1OlqZBpw= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azkeys v1.0.1/go.mod h1:GpPjLhVR9dnUoJMyHWSPy71xY9/lcmpzIPZXmF0FCVY= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.0.0 h1:D3occbWoio4EBLkbkevetNMAVX197GkzbUMtqjGWn80= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.0.0/go.mod h1:bTSOgj05NGRuHHhQwAdPnYr9TOdNmKlZTgGLL6nyAdI= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0 h1:u/LLAOFgsMv7HmNL4Qufg58y+qElGOt5qv0z1mURkRY= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0/go.mod h1:2e8rMJtl2+2j+HXbTBwnyGpm5Nou7KhvSfxOq8JpTag= github.com/Azure/azure-storage-blob-go v0.14.0/go.mod h1:SMqIBi+SuiQH32bvyjngEewEeXoPfKMgWlBDaYf6fck= @@ -102,9 +102,8 @@ github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSY github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= -github.com/AzureAD/microsoft-authentication-library-for-go v0.8.1/go.mod h1:4qFor3D/HDsvBME35Xy9rwW9DecL+M2sNw1ybjPtwA0= -github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU= -github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= +github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2 h1:kYRSnvJju5gYVyhkij+RTJ/VR6QIUaCfWeaFm2ycsjQ= +github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DataDog/appsec-internal-go v1.6.0 h1:QHvPOv/O0s2fSI/BraZJNpRDAtdlrRm5APJFZNBxjAw= @@ -329,8 +328,6 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= -github.com/dnaeon/go-vcr v1.1.0/go.mod h1:M7tiix8f0r6mKKJ3Yq/kqU1OYf3MnfmBWVbPx/yU9ko= -github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/dnephin/pflag v1.0.7 h1:oxONGlWxhmUct0YzKTgrpQv9AUA1wtPBn7zuSjJqptk= github.com/dnephin/pflag v1.0.7/go.mod h1:uxE91IoWURlOiTUIA8Mq5ZZkAv3dPUfZNaT80Zm7OQE= github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M= @@ -453,10 +450,8 @@ github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt/v4 v4.4.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= -github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= @@ -560,10 +555,8 @@ github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= -github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= @@ -576,8 +569,6 @@ github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= github.com/gorilla/schema v1.4.1 h1:jUg5hUjCSDZpNGLuXQOgIWGdlgrIdYvgQ0wZtdK1M3E= github.com/gorilla/schema v1.4.1/go.mod h1:Dg5SSm5PV60mhF2NFaTV1xuYYj8tV8NOPRo4FggUMnM= -github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= -github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= @@ -604,7 +595,6 @@ github.com/hashicorp/go-secure-stdlib/strutil v0.1.2/go.mod h1:Gou2R9+il93BqX25L github.com/hashicorp/go-sockaddr v1.0.2 h1:ztczhD1jLxIRjVejw8gFomI1BQZOe2WoVOu0SyteCQc= github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A= github.com/hashicorp/go-uuid v0.0.0-20180228145832-27454136f036/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -643,15 +633,9 @@ github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY= github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw= github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk= github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= -github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= -github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= github.com/jcmturner/gofork v0.0.0-20180107083740-2aebee971930/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= -github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= -github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= -github.com/jcmturner/gokrb5/v8 v8.4.2/go.mod h1:sb+Xq/fTY5yktf/VxLsE3wlfPqQjp0aWNYyvBVK62bc= -github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= @@ -723,8 +707,8 @@ github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxU github.com/mattn/go-sqlite3 v2.0.3+incompatible h1:gXHsfypPkaMZrKbD5209QV9jbUTJKjyR5WD3HYQSd+U= github.com/mattn/go-sqlite3 v2.0.3+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/microsoft/go-mssqldb v0.21.0 h1:p2rpHIL7TlSv1QrbXJUAcbyRKnIT0C9rRkH2E4OjLn8= -github.com/microsoft/go-mssqldb v0.21.0/go.mod h1:+4wZTUnz/SV6nffv+RRRB/ss8jPng5Sho2SmM1l2ts4= +github.com/microsoft/go-mssqldb v1.8.0 h1:7cyZ/AT7ycDsEoWPIXibd+aVKFtteUNhDGf3aobP+tw= +github.com/microsoft/go-mssqldb v1.8.0/go.mod h1:6znkekS3T2vp0waiMhen4GPU1BiAsrP+iXHcE7a7rFo= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= @@ -742,8 +726,6 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3PzxT8aQXRPkAt8xlV/e7d7w8GM5g0fa5F0D8= -github.com/montanaflynn/stats v0.6.6/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE= github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= github.com/mtibben/percent v0.2.1 h1:5gssi8Nqo8QU/r2pynCm+hBQHpkB/uNK7BJCFogWdzs= @@ -794,8 +776,6 @@ github.com/pingcap/log v1.1.1-0.20230317032135-a0d097d16e22 h1:2SOzvGvE8beiC1Y4g github.com/pingcap/log v1.1.1-0.20230317032135-a0d097d16e22/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/tidb/pkg/parser v0.0.0-20240626120124-432bb79f9732 h1:kW8maR7ldPzfK8ThUR8tg/xFjpxCJmjL0P9FMbxITpQ= github.com/pingcap/tidb/pkg/parser v0.0.0-20240626120124-432bb79f9732/go.mod h1:c/4la2yfv1vBYvtIG8WCDyDinLMDIUC5+zLRHiafY+Y= -github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4/go.mod h1:N6UoU20jOqggOuDwUaBQpluzLNDqif3kq9z2wpdYEfQ= -github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzLZPlr7++PzdhaXEj94dEiJgZDTsxEqUI= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -1027,11 +1007,8 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220511200225-c6db032c6c88/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= @@ -1117,7 +1094,6 @@ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= @@ -1129,8 +1105,6 @@ golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96b golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= @@ -1223,7 +1197,6 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -1233,8 +1206,6 @@ golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220224120231-95c6836cb0e7/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220627191245-f75cf1eec38b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -1528,8 +1499,6 @@ gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3M gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= -gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce h1:+JknDZhAj8YMt7GC73Ei8pv4MzjDUNPHgQWJdtMAaDU= -gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce/go.mod h1:5AcXVHNjg+BDxry382+8OKon8SEWiKktQR07RKPsv1c= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/materialize-azure-fabric-warehouse/.snapshots/TestSQLGeneration b/materialize-azure-fabric-warehouse/.snapshots/TestSQLGeneration new file mode 100644 index 0000000000..64d681ee54 --- /dev/null +++ b/materialize-azure-fabric-warehouse/.snapshots/TestSQLGeneration @@ -0,0 +1,224 @@ +--- Begin "a-warehouse"."a-schema".key_value createTargetTable --- + +CREATE TABLE "a-warehouse"."a-schema".key_value ( + key1 BIGINT, + key2 BIT, + "key!binary" VARBINARY(MAX), + "array" VARCHAR(MAX), + "binary" VARBINARY(MAX), + "boolean" BIT, + flow_published_at DATETIME2(6), + "integer" BIGINT, + "integerGt64Bit" DECIMAL(38,0), + "integerWithUserDDL" DECIMAL(20), + multiple VARCHAR(MAX), + number FLOAT, + "numberCastToString" VARCHAR(MAX), + "object" VARCHAR(MAX), + string VARCHAR(MAX), + "stringInteger" DECIMAL(38,0), + "stringInteger39Chars" VARCHAR(MAX), + "stringInteger66Chars" VARCHAR(MAX), + "stringNumber" FLOAT, + flow_document VARCHAR(MAX) +); +--- End "a-warehouse"."a-schema".key_value createTargetTable --- + +--- Begin "a-warehouse"."a-schema".delta_updates createTargetTable --- + +CREATE TABLE "a-warehouse"."a-schema".delta_updates ( + "theKey" VARCHAR(MAX), + "aValue" BIGINT, + flow_published_at DATETIME2(6) +); +--- End "a-warehouse"."a-schema".delta_updates createTargetTable --- + +--- Begin alter table add columns --- + +ALTER TABLE "a-warehouse"."a-schema".key_value ADD + first_new_column STRING, + second_new_column BOOL; +--- End alter table add columns --- + +--- Begin Fence Update --- +UPDATE "path"."to".checkpoints + SET "checkpoint" = 'AAECAwQFBgcICQ==' + WHERE materialization = 'some/Materialization' + AND key_begin = 1122867 + AND key_end = 4293844428 + AND fence = 123; +--- End Fence Update --- + +--- Begin "a-warehouse"."a-schema".key_value storeCopyIntoFromStagedQuery --- +CREATE TABLE flow_temp_table_store_0 ( + key1 BIGINT, + key2 BIT, + "key!binary" VARCHAR(MAX), + "array" VARCHAR(MAX), + "binary" VARCHAR(MAX), + "boolean" BIT, + flow_published_at DATETIME2(6), + "integer" BIGINT, + "integerGt64Bit" DECIMAL(38,0), + "integerWithUserDDL" DECIMAL(20), + multiple VARCHAR(MAX), + number FLOAT, + "numberCastToString" VARCHAR(MAX), + "object" VARCHAR(MAX), + string VARCHAR(MAX), + "stringInteger" DECIMAL(38,0), + "stringInteger39Chars" VARCHAR(MAX), + "stringInteger66Chars" VARCHAR(MAX), + "stringNumber" FLOAT, + flow_document VARCHAR(MAX) +); + +COPY INTO flow_temp_table_store_0 +(key1, key2, "key!binary", "array", "binary", "boolean", flow_published_at, "integer", "integerGt64Bit", "integerWithUserDDL", multiple, number, "numberCastToString", "object", string, "stringInteger", "stringInteger39Chars", "stringInteger66Chars", "stringNumber", flow_document) +FROM 'https://some/file1', 'https://some/file2' +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='some-storage-account-key') +); + +INSERT INTO "a-warehouse"."a-schema".key_value (key1, key2, "key!binary", "array", "binary", "boolean", flow_published_at, "integer", "integerGt64Bit", "integerWithUserDDL", multiple, number, "numberCastToString", "object", string, "stringInteger", "stringInteger39Chars", "stringInteger66Chars", "stringNumber", flow_document) +SELECT key1, key2, BASE64_DECODE("key!binary"), "array", BASE64_DECODE("binary"), "boolean", flow_published_at, "integer", "integerGt64Bit", "integerWithUserDDL", multiple, number, "numberCastToString", "object", string, "stringInteger", "stringInteger39Chars", "stringInteger66Chars", "stringNumber", flow_document +FROM flow_temp_table_store_0; + +DROP TABLE flow_temp_table_store_0; +--- End "a-warehouse"."a-schema".key_value storeCopyIntoFromStagedQuery --- + +--- Begin "a-warehouse"."a-schema".key_value storeCopyIntoDirectQuery --- +COPY INTO "a-warehouse"."a-schema".key_value +(key1, key2, "key!binary", "array", "binary", "boolean", flow_published_at, "integer", "integerGt64Bit", "integerWithUserDDL", multiple, number, "numberCastToString", "object", string, "stringInteger", "stringInteger39Chars", "stringInteger66Chars", "stringNumber", flow_document) +FROM 'https://some/file1', 'https://some/file2' +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='some-storage-account-key') +); +--- End "a-warehouse"."a-schema".key_value storeCopyIntoDirectQuery --- + +--- Begin "a-warehouse"."a-schema".delta_updates storeCopyIntoFromStagedQuery --- +CREATE TABLE flow_temp_table_store_1 ( + "theKey" VARCHAR(MAX), + "aValue" BIGINT, + flow_published_at DATETIME2(6) +); + +COPY INTO flow_temp_table_store_1 +("theKey", "aValue", flow_published_at) +FROM 'https://some/file1', 'https://some/file2' +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='some-storage-account-key') +); + +INSERT INTO "a-warehouse"."a-schema".delta_updates ("theKey", "aValue", flow_published_at) +SELECT "theKey", "aValue", flow_published_at +FROM flow_temp_table_store_1; + +DROP TABLE flow_temp_table_store_1; +--- End "a-warehouse"."a-schema".delta_updates storeCopyIntoFromStagedQuery --- + +--- Begin "a-warehouse"."a-schema".delta_updates storeCopyIntoDirectQuery --- +COPY INTO "a-warehouse"."a-schema".delta_updates +("theKey", "aValue", flow_published_at) +FROM 'https://some/file1', 'https://some/file2' +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='some-storage-account-key') +); +--- End "a-warehouse"."a-schema".delta_updates storeCopyIntoDirectQuery --- + +--- Begin "a-warehouse"."a-schema".key_value createLoadTable --- +CREATE TABLE flow_temp_table_load_0 ( + key1 BIGINT, + key2 BIT, + "key!binary" VARCHAR(MAX) +); + +COPY INTO flow_temp_table_load_0 +(key1, key2, "key!binary") +FROM 'https://some/file1', 'https://some/file2' +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='some-storage-account-key') +); +--- End "a-warehouse"."a-schema".key_value createLoadTable --- + +--- Begin "a-warehouse"."a-schema".key_value loadQuery --- +SELECT 0, r.flow_document +FROM flow_temp_table_load_0 AS l +JOIN "a-warehouse"."a-schema".key_value AS r + ON l.key1 = r.key1 AND r.key1 >= 10 AND r.key1 <= 100 + AND l.key2 = r.key2 + AND BASE64_DECODE(l."key!binary") = r."key!binary" +--- End "a-warehouse"."a-schema".key_value loadQuery --- + +--- Begin "a-warehouse"."a-schema".key_value dropLoadTable --- +DROP TABLE flow_temp_table_load_0;--- End "a-warehouse"."a-schema".key_value dropLoadTable --- + +--- Begin "a-warehouse"."a-schema".key_value storeMergeQuery --- +CREATE TABLE flow_temp_table_store_0 ( + key1 BIGINT, + key2 BIT, + "key!binary" VARCHAR(MAX), + "array" VARCHAR(MAX), + "binary" VARCHAR(MAX), + "boolean" BIT, + flow_published_at DATETIME2(6), + "integer" BIGINT, + "integerGt64Bit" DECIMAL(38,0), + "integerWithUserDDL" DECIMAL(20), + multiple VARCHAR(MAX), + number FLOAT, + "numberCastToString" VARCHAR(MAX), + "object" VARCHAR(MAX), + string VARCHAR(MAX), + "stringInteger" DECIMAL(38,0), + "stringInteger39Chars" VARCHAR(MAX), + "stringInteger66Chars" VARCHAR(MAX), + "stringNumber" FLOAT, + flow_document VARCHAR(MAX) +); + +COPY INTO flow_temp_table_store_0 +(key1, key2, "key!binary", "array", "binary", "boolean", flow_published_at, "integer", "integerGt64Bit", "integerWithUserDDL", multiple, number, "numberCastToString", "object", string, "stringInteger", "stringInteger39Chars", "stringInteger66Chars", "stringNumber", flow_document) +FROM 'https://some/file1', 'https://some/file2' +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='some-storage-account-key') +); + +DELETE r +FROM "a-warehouse"."a-schema".key_value AS r +INNER JOIN flow_temp_table_store_0 AS l + ON l.key1 = r.key1 AND r.key1 >= 10 AND r.key1 <= 100 + AND l.key2 = r.key2 + AND BASE64_DECODE(l."key!binary") = r."key!binary"; + +INSERT INTO "a-warehouse"."a-schema".key_value (key1, key2, "key!binary", "array", "binary", "boolean", flow_published_at, "integer", "integerGt64Bit", "integerWithUserDDL", multiple, number, "numberCastToString", "object", string, "stringInteger", "stringInteger39Chars", "stringInteger66Chars", "stringNumber", flow_document) +SELECT key1, key2, BASE64_DECODE("key!binary"), "array", BASE64_DECODE("binary"), "boolean", flow_published_at, "integer", "integerGt64Bit", "integerWithUserDDL", multiple, number, "numberCastToString", "object", string, "stringInteger", "stringInteger39Chars", "stringInteger66Chars", "stringNumber", flow_document +FROM flow_temp_table_store_0 +WHERE flow_document <> '"delete"'; + +DROP TABLE flow_temp_table_store_0; +--- End "a-warehouse"."a-schema".key_value storeMergeQuery --- + +--- Begin createMigrationTable +CREATE TABLE some_table_tmp AS SELECT + not_migrated_column, + CAST(is_migrated_column AS VARCHAR(MAX)) AS is_migrated_column, + another_not_migrated_column, + CAST(CASE WHEN migrated_boolean_column = 1 THEN 'true' WHEN migrated_boolean_column = 0 THEN 'false' ELSE NULL END AS VARCHAR(MAX)) AS migrated_boolean_column, + yet_another_not_migrated_column + FROM some_table; +--- End createMigrationTable --- + + diff --git a/materialize-azure-fabric-warehouse/.snapshots/TestSpecification b/materialize-azure-fabric-warehouse/.snapshots/TestSpecification new file mode 100644 index 0000000000..c30e6c9160 --- /dev/null +++ b/materialize-azure-fabric-warehouse/.snapshots/TestSpecification @@ -0,0 +1,220 @@ +{ + "config_schema_json": { + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/estuary/connectors/materialize-azure-fabric-warehouse/config", + "properties": { + "clientID": { + "type": "string", + "title": "Client ID", + "description": "Client ID for the service principal used to connect to the Azure Fabric Warehouse.", + "order": 0 + }, + "clientSecret": { + "type": "string", + "title": "Client Secret", + "description": "Client Secret for the service principal used to connect to the Azure Fabric Warehouse.", + "order": 1, + "secret": true + }, + "warehouse": { + "type": "string", + "title": "Warehouse", + "description": "Name of the Azure Fabric Warehouse to connect to.", + "order": 2 + }, + "schema": { + "type": "string", + "title": "Schema", + "description": "Schema for bound collection tables (unless overridden within the binding resource configuration) as well as associated materialization metadata tables.", + "order": 3 + }, + "connectionString": { + "type": "string", + "title": "Connection String", + "description": "SQL connection string for the Azure Fabric Warehouse.", + "order": 4 + }, + "storageAccountName": { + "type": "string", + "title": "Storage Account Name", + "description": "Name of the storage account that temporary files will be written to.", + "order": 5 + }, + "storageAccountKey": { + "type": "string", + "title": "Storage Account Key", + "description": "Storage account key for the storage account that temporary files will be written to.", + "order": 6, + "secret": true + }, + "containerName": { + "type": "string", + "title": "Storage Account Container Name", + "description": "Name of the container in the storage account where temporary files will be written.", + "order": 7 + }, + "directory": { + "type": "string", + "title": "Directory", + "description": "Optional prefix that will be used for temporary files.", + "order": 8 + }, + "hardDelete": { + "type": "boolean", + "title": "Hard Delete", + "description": "If this option is enabled items deleted in the source will also be deleted from the destination. By default is disabled and _meta/op in the destination will signify whether rows have been deleted (soft-delete).", + "default": false, + "order": 9 + }, + "syncSchedule": { + "properties": { + "syncFrequency": { + "type": "string", + "enum": [ + "0s", + "30s", + "5m", + "15m", + "30m", + "1h", + "2h", + "4h" + ], + "title": "Sync Frequency", + "description": "Frequency at which transactions are executed when the materialization is fully caught up and streaming changes. May be enabled only for certain time periods and days of the week if configured below; otherwise it is effective 24/7. Defaults to 30 minutes if unset.", + "order": 0 + }, + "timezone": { + "type": "string", + "title": "Timezone", + "description": "Timezone applicable to sync time windows and active days. Must be a valid IANA time zone name or +HH:MM offset.", + "order": 1 + }, + "fastSyncStartTime": { + "type": "string", + "title": "Fast Sync Start Time", + "description": "Time of day that transactions begin executing at the configured Sync Frequency. Prior to this time transactions will be executed more slowly. Must be in the form of '09:00'.", + "order": 2, + "pattern": "^(0?[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$" + }, + "fastSyncStopTime": { + "type": "string", + "title": "Fast Sync Stop Time", + "description": "Time of day that transactions stop executing at the configured Sync Frequency. After this time transactions will be executed more slowly. Must be in the form of '17:00'.", + "order": 3, + "pattern": "^(0?[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$" + }, + "fastSyncEnabledDays": { + "type": "string", + "title": "Fast Sync Enabled Days", + "description": "Days of the week that the configured Sync Frequency is active. On days that are not enabled, transactions will be executed more slowly for the entire day. Examples: 'M-F' (Monday through Friday, inclusive), 'M,W,F' (Monday, Wednesday, and Friday), 'Su-T,Th-S' (Sunday through Tuesday, inclusive; Thursday through Saturday, inclusive). All days are enabled if unset.", + "order": 4 + } + }, + "additionalProperties": false, + "type": "object", + "title": "Sync Schedule", + "description": "Configure schedule of transactions for the materialization." + }, + "dbt_job_trigger": { + "properties": { + "job_id": { + "type": "string", + "title": "Job ID", + "description": "dbt job ID" + }, + "account_id": { + "type": "string", + "title": "Account ID", + "description": "dbt account ID" + }, + "access_url": { + "type": "string", + "title": "Access URL", + "description": "dbt access URL can be found in your Account Settings. See go.estuary.dev/dbt-cloud-trigger", + "pattern": "^https://.+$" + }, + "api_key": { + "type": "string", + "title": "API Key", + "description": "dbt API Key", + "secret": true + }, + "cause": { + "type": "string", + "title": "Cause Message", + "description": "You can set a custom 'cause' message for the job trigger. Defaults to 'Estuary Flow'." + }, + "mode": { + "type": "string", + "enum": [ + "skip", + "replace", + "ignore" + ], + "title": "Job Trigger Mode", + "description": "Specifies how should already-running jobs be treated. Defaults to 'skip' which skips the trigger if a job is already running; 'replace' cancels the running job and runs a new one; while 'ignore' triggers a new job regardless of existing jobs.", + "default": "skip" + }, + "interval": { + "type": "string", + "title": "Minimum Run Interval", + "description": "Minimum time between dbt job triggers. This interval is only triggered if data has been materialized by your task.", + "default": "30m" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "job_id", + "account_id", + "api_key" + ], + "title": "dbt Cloud Job Trigger", + "description": "Trigger a dbt job when new data is available" + } + }, + "type": "object", + "required": [ + "clientID", + "clientSecret", + "warehouse", + "schema", + "connectionString", + "storageAccountName", + "storageAccountKey", + "containerName" + ], + "title": "SQL Connection" + }, + "resource_config_schema_json": { + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/estuary/connectors/materialize-azure-fabric-warehouse/table-config", + "properties": { + "table": { + "type": "string", + "title": "Table", + "description": "Name of the database table.", + "x-collection-name": true + }, + "schema": { + "type": "string", + "title": "Alternative Schema", + "description": "Alternative schema for this table (optional).", + "x-schema-name": true + }, + "delta_updates": { + "type": "boolean", + "title": "Delta Update", + "description": "Should updates to this table be done via delta updates.", + "x-delta-updates": true + } + }, + "type": "object", + "required": [ + "table" + ], + "title": "SQL Table" + }, + "documentation_url": "https://go.estuary.dev/materialize-azure-fabric-warehouse" +} diff --git a/materialize-azure-fabric-warehouse/.snapshots/TestValidateAndApply b/materialize-azure-fabric-warehouse/.snapshots/TestValidateAndApply new file mode 100644 index 0000000000..ad5fe7b106 --- /dev/null +++ b/materialize-azure-fabric-warehouse/.snapshots/TestValidateAndApply @@ -0,0 +1,350 @@ +Big Schema Initial Constraints: +{"Field":"_meta/flow_truncated","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Metadata fields are able to be materialized"} +{"Field":"arrayField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This field is able to be materialized"} +{"Field":"boolField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"flow_document","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The root document should usually be materialized"} +{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"intField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"key","Type":2,"TypeString":"LOCATION_REQUIRED","Reason":"All Locations that are part of the collections key are required"} +{"Field":"multipleField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This field is able to be materialized"} +{"Field":"nullField","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Cannot materialize a field where the only possible type is 'null'"} +{"Field":"numField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"objField","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"} +{"Field":"stringDateField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringDateTimeField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringDurationField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringEmailField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringHostnameField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIdnEmailField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIdnHostnameField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIntegerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIpv4Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIpv6Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIriField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIriReferenceField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringJsonPointerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringMacAddr8Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringMacAddrField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringNumberField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringRegexField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringRelativeJsonPointerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringTimeField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUint32Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUint64Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUriField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUriReferenceField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUriTemplateField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUuidField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} + +Big Schema Re-validated Constraints: +{"Field":"_meta/flow_truncated","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"arrayField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"boolField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"flow_document","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"intField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"key","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is a key in the current materialization"} +{"Field":"multipleField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"nullField","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Cannot materialize a field where the only possible type is 'null'"} +{"Field":"numField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"objField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringDateField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringDateTimeField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringDurationField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringEmailField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringHostnameField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIdnEmailField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIdnHostnameField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIntegerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIpv4Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIpv6Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIriField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIriReferenceField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringJsonPointerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringMacAddr8Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringMacAddrField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringNumberField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringRegexField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringRelativeJsonPointerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringTimeField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUint32Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUint64Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUriField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUriReferenceField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUriTemplateField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUuidField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} + +Big Schema Changed Types Constraints: +{"Field":"_meta/flow_truncated","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"arrayField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"boolField","Type":6,"TypeString":"UNSATISFIABLE","Reason":"Field 'boolField' is already being materialized as endpoint type 'BIT' but endpoint type 'BIGINT' is required by its schema '{ type: [integer] }'"} +{"Field":"flow_document","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"intField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"key","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is a key in the current materialization"} +{"Field":"multipleField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"nullField","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"} +{"Field":"numField","Type":6,"TypeString":"UNSATISFIABLE","Reason":"Field 'numField' is already being materialized as endpoint type 'FLOAT' but endpoint type 'BIT' is required by its schema '{ type: [boolean] }'"} +{"Field":"objField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringDateField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringDateTimeField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringDurationField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringEmailField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringField","Type":6,"TypeString":"UNSATISFIABLE","Reason":"Field 'stringField' is already being materialized as endpoint type 'VARCHAR' but endpoint type 'BIGINT' is required by its schema '{ type: [integer] }'"} +{"Field":"stringHostnameField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIdnEmailField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIdnHostnameField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIntegerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIpv4Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIpv6Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIriField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringIriReferenceField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringJsonPointerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringMacAddr8Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringMacAddrField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringNumberField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringRegexField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringRelativeJsonPointerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringTimeField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUint32Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUint64Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUriField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUriReferenceField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUriTemplateField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"stringUuidField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} + +Big Schema Materialized Resource Schema With All Fields Required: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"arrayField","Nullable":"YES","Type":"varchar"} +{"Name":"boolField","Nullable":"YES","Type":"bit"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"intField","Nullable":"YES","Type":"bigint"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"multipleField","Nullable":"YES","Type":"varchar"} +{"Name":"numField","Nullable":"YES","Type":"float"} +{"Name":"objField","Nullable":"YES","Type":"varchar"} +{"Name":"stringDateField","Nullable":"YES","Type":"date"} +{"Name":"stringDateTimeField","Nullable":"YES","Type":"datetime2"} +{"Name":"stringDurationField","Nullable":"YES","Type":"varchar"} +{"Name":"stringEmailField","Nullable":"YES","Type":"varchar"} +{"Name":"stringField","Nullable":"YES","Type":"varchar"} +{"Name":"stringHostnameField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIdnEmailField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIdnHostnameField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIntegerField","Nullable":"YES","Type":"decimal"} +{"Name":"stringIpv4Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringIpv6Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringIriField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIriReferenceField","Nullable":"YES","Type":"varchar"} +{"Name":"stringJsonPointerField","Nullable":"YES","Type":"varchar"} +{"Name":"stringMacAddr8Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringMacAddrField","Nullable":"YES","Type":"varchar"} +{"Name":"stringNumberField","Nullable":"YES","Type":"float"} +{"Name":"stringRegexField","Nullable":"YES","Type":"varchar"} +{"Name":"stringRelativeJsonPointerField","Nullable":"YES","Type":"varchar"} +{"Name":"stringTimeField","Nullable":"YES","Type":"time"} +{"Name":"stringUint32Field","Nullable":"YES","Type":"decimal"} +{"Name":"stringUint64Field","Nullable":"YES","Type":"decimal"} +{"Name":"stringUriField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUriReferenceField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUriTemplateField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUuidField","Nullable":"YES","Type":"varchar"} + +Big Schema Materialized Resource Schema With No Fields Required: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"arrayField","Nullable":"YES","Type":"varchar"} +{"Name":"boolField","Nullable":"YES","Type":"bit"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"intField","Nullable":"YES","Type":"bigint"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"multipleField","Nullable":"YES","Type":"varchar"} +{"Name":"numField","Nullable":"YES","Type":"float"} +{"Name":"objField","Nullable":"YES","Type":"varchar"} +{"Name":"stringDateField","Nullable":"YES","Type":"date"} +{"Name":"stringDateTimeField","Nullable":"YES","Type":"datetime2"} +{"Name":"stringDurationField","Nullable":"YES","Type":"varchar"} +{"Name":"stringEmailField","Nullable":"YES","Type":"varchar"} +{"Name":"stringField","Nullable":"YES","Type":"varchar"} +{"Name":"stringHostnameField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIdnEmailField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIdnHostnameField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIntegerField","Nullable":"YES","Type":"decimal"} +{"Name":"stringIpv4Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringIpv6Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringIriField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIriReferenceField","Nullable":"YES","Type":"varchar"} +{"Name":"stringJsonPointerField","Nullable":"YES","Type":"varchar"} +{"Name":"stringMacAddr8Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringMacAddrField","Nullable":"YES","Type":"varchar"} +{"Name":"stringNumberField","Nullable":"YES","Type":"float"} +{"Name":"stringRegexField","Nullable":"YES","Type":"varchar"} +{"Name":"stringRelativeJsonPointerField","Nullable":"YES","Type":"varchar"} +{"Name":"stringTimeField","Nullable":"YES","Type":"time"} +{"Name":"stringUint32Field","Nullable":"YES","Type":"decimal"} +{"Name":"stringUint64Field","Nullable":"YES","Type":"decimal"} +{"Name":"stringUriField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUriReferenceField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUriTemplateField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUuidField","Nullable":"YES","Type":"varchar"} + +Big Schema Changed Types With Table Replacement Constraints: +{"Field":"_meta/flow_truncated","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Metadata fields are able to be materialized"} +{"Field":"arrayField","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"} +{"Field":"boolField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"flow_document","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The root document should usually be materialized"} +{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"intField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"key","Type":2,"TypeString":"LOCATION_REQUIRED","Reason":"All Locations that are part of the collections key are required"} +{"Field":"multipleField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This field is able to be materialized"} +{"Field":"nullField","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"} +{"Field":"numField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"objField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringDateField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringDateTimeField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringDurationField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringEmailField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringHostnameField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIdnEmailField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIdnHostnameField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIntegerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIpv4Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIpv6Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIriField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringIriReferenceField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringJsonPointerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringMacAddr8Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringMacAddrField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringNumberField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringRegexField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringRelativeJsonPointerField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringTimeField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUint32Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUint64Field","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUriField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUriReferenceField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUriTemplateField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"stringUuidField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} + +Big Schema Materialized Resource Schema Changed Types With Table Replacement: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"arrayField","Nullable":"YES","Type":"varchar"} +{"Name":"boolField","Nullable":"YES","Type":"bigint"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"intField","Nullable":"YES","Type":"varchar"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"multipleField","Nullable":"YES","Type":"varchar"} +{"Name":"nullField","Nullable":"YES","Type":"varchar"} +{"Name":"numField","Nullable":"YES","Type":"bit"} +{"Name":"objField","Nullable":"YES","Type":"varchar"} +{"Name":"stringDateField","Nullable":"YES","Type":"varchar"} +{"Name":"stringDateTimeField","Nullable":"YES","Type":"varchar"} +{"Name":"stringDurationField","Nullable":"YES","Type":"varchar"} +{"Name":"stringEmailField","Nullable":"YES","Type":"varchar"} +{"Name":"stringField","Nullable":"YES","Type":"bigint"} +{"Name":"stringHostnameField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIdnEmailField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIdnHostnameField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIntegerField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIpv4Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringIpv6Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringIriField","Nullable":"YES","Type":"varchar"} +{"Name":"stringIriReferenceField","Nullable":"YES","Type":"varchar"} +{"Name":"stringJsonPointerField","Nullable":"YES","Type":"varchar"} +{"Name":"stringMacAddr8Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringMacAddrField","Nullable":"YES","Type":"varchar"} +{"Name":"stringNumberField","Nullable":"YES","Type":"varchar"} +{"Name":"stringRegexField","Nullable":"YES","Type":"varchar"} +{"Name":"stringRelativeJsonPointerField","Nullable":"YES","Type":"varchar"} +{"Name":"stringTimeField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUint32Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringUint64Field","Nullable":"YES","Type":"varchar"} +{"Name":"stringUriField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUriReferenceField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUriTemplateField","Nullable":"YES","Type":"varchar"} +{"Name":"stringUuidField","Nullable":"YES","Type":"varchar"} + +add a single field: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"addedOptionalString","Nullable":"YES","Type":"varchar"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"optionalBoolean","Nullable":"YES","Type":"bit"} +{"Name":"optionalInteger","Nullable":"YES","Type":"bigint"} +{"Name":"optionalObject","Nullable":"YES","Type":"varchar"} +{"Name":"optionalString","Nullable":"YES","Type":"varchar"} +{"Name":"requiredBoolean","Nullable":"YES","Type":"bit"} +{"Name":"requiredInteger","Nullable":"YES","Type":"bigint"} +{"Name":"requiredObject","Nullable":"YES","Type":"varchar"} +{"Name":"requiredString","Nullable":"YES","Type":"varchar"} +{"Name":"second_root","Nullable":"YES","Type":"varchar"} + +remove a single optional field: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"optionalBoolean","Nullable":"YES","Type":"bit"} +{"Name":"optionalInteger","Nullable":"YES","Type":"bigint"} +{"Name":"optionalObject","Nullable":"YES","Type":"varchar"} +{"Name":"optionalString","Nullable":"YES","Type":"varchar"} +{"Name":"requiredBoolean","Nullable":"YES","Type":"bit"} +{"Name":"requiredInteger","Nullable":"YES","Type":"bigint"} +{"Name":"requiredObject","Nullable":"YES","Type":"varchar"} +{"Name":"requiredString","Nullable":"YES","Type":"varchar"} +{"Name":"second_root","Nullable":"YES","Type":"varchar"} + +remove a single required field: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"optionalBoolean","Nullable":"YES","Type":"bit"} +{"Name":"optionalInteger","Nullable":"YES","Type":"bigint"} +{"Name":"optionalObject","Nullable":"YES","Type":"varchar"} +{"Name":"optionalString","Nullable":"YES","Type":"varchar"} +{"Name":"requiredBoolean","Nullable":"YES","Type":"bit"} +{"Name":"requiredInteger","Nullable":"YES","Type":"bigint"} +{"Name":"requiredObject","Nullable":"YES","Type":"varchar"} +{"Name":"requiredString","Nullable":"YES","Type":"varchar"} +{"Name":"second_root","Nullable":"YES","Type":"varchar"} + +add and remove many fields: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"addedOptionalString","Nullable":"YES","Type":"varchar"} +{"Name":"addedRequiredString","Nullable":"YES","Type":"varchar"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"optionalBoolean","Nullable":"YES","Type":"bit"} +{"Name":"optionalInteger","Nullable":"YES","Type":"bigint"} +{"Name":"optionalObject","Nullable":"YES","Type":"varchar"} +{"Name":"optionalString","Nullable":"YES","Type":"varchar"} +{"Name":"requiredBoolean","Nullable":"YES","Type":"bit"} +{"Name":"requiredInteger","Nullable":"YES","Type":"bigint"} +{"Name":"requiredObject","Nullable":"YES","Type":"varchar"} +{"Name":"requiredString","Nullable":"YES","Type":"varchar"} +{"Name":"second_root","Nullable":"YES","Type":"varchar"} + +Challenging Field Names Materialized Columns: +{"Name":" ,;{}().- problematicKey � 𐀀 嶲 ","Nullable":"YES","Type":"varchar"} +{"Name":" ,;{}().- problematicValue � 𐀀 嶲 ","Nullable":"YES","Type":"varchar"} +{"Name":"$dollar$signs","Nullable":"YES","Type":"varchar"} +{"Name":"123","Nullable":"YES","Type":"varchar"} +{"Name":"123startsWithDigits","Nullable":"YES","Type":"varchar"} +{"Name":"_id","Nullable":"YES","Type":"varchar"} +{"Name":"a\"string`with`quote'characters","Nullable":"YES","Type":"varchar"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"value with separated words","Nullable":"YES","Type":"varchar"} +{"Name":"value-with-separated-words","Nullable":"YES","Type":"varchar"} +{"Name":"value.with-separated_words","Nullable":"YES","Type":"varchar"} +{"Name":"value.with.separated.words","Nullable":"YES","Type":"varchar"} +{"Name":"value_with_separated_words","Nullable":"YES","Type":"varchar"} + diff --git a/materialize-azure-fabric-warehouse/.snapshots/TestValidateAndApplyMigrations b/materialize-azure-fabric-warehouse/.snapshots/TestValidateAndApplyMigrations new file mode 100644 index 0000000000..508d6ad049 --- /dev/null +++ b/materialize-azure-fabric-warehouse/.snapshots/TestValidateAndApplyMigrations @@ -0,0 +1,90 @@ +Base Initial Constraints: +{"Field":"_meta/flow_truncated","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Metadata fields are able to be materialized"} +{"Field":"boolWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"dateValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"datetimeValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"flow_document","Type":2,"TypeString":"LOCATION_REQUIRED","Reason":"The root document must be materialized"} +{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"int64","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"intWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"key","Type":2,"TypeString":"LOCATION_REQUIRED","Reason":"All Locations that are part of the collections key are required"} +{"Field":"multiple","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This field is able to be materialized"} +{"Field":"nonScalarValue","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"} +{"Field":"nullValue","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Cannot materialize a field where the only possible type is 'null'"} +{"Field":"numericString","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"optional","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"} +{"Field":"requiredNumeric","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"scalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"second_root","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Only a single root document projection can be materialized for standard updates"} +{"Field":"stringWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} +{"Field":"timeValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"The projection has a single scalar type"} + +Migratable Changes Before Apply Schema: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"boolWidenedToJson","Nullable":"YES","Type":"bit"} +{"Name":"dateValue","Nullable":"YES","Type":"date"} +{"Name":"datetimeValue","Nullable":"YES","Type":"datetime2"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"int64","Nullable":"YES","Type":"bigint"} +{"Name":"intWidenedToJson","Nullable":"YES","Type":"bigint"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"multiple","Nullable":"YES","Type":"varchar"} +{"Name":"nonScalarValue","Nullable":"YES","Type":"varchar"} +{"Name":"numericString","Nullable":"YES","Type":"decimal"} +{"Name":"optional","Nullable":"YES","Type":"varchar"} +{"Name":"requiredNumeric","Nullable":"YES","Type":"decimal"} +{"Name":"scalarValue","Nullable":"YES","Type":"varchar"} +{"Name":"stringWidenedToJson","Nullable":"YES","Type":"varchar"} +{"Name":"timeValue","Nullable":"YES","Type":"time"} + + +Migratable Changes Before Apply Data: +key (NVARCHAR), _meta/flow_truncated (BIT), boolWidenedToJson (BIT), dateValue (DATE), datetimeValue (DATETIME2), flow_published_at (DATETIME2), int64 (BIGINT), intWidenedToJson (BIGINT), multiple (NVARCHAR), nonScalarValue (NVARCHAR), numericString (DECIMAL), optional (NVARCHAR), requiredNumeric (DECIMAL), scalarValue (NVARCHAR), stringWidenedToJson (NVARCHAR), timeValue (TIME), flow_document (NVARCHAR) +1, false, true, 2024-01-01T00:00:00Z, 2024-01-01T01:01:01.111111Z, 2024-09-13T01:01:01Z, 1, 999, , , 123, , 456, test, hello, 0001-01-01T01:01:01Z, {} + +Migratable Changes Constraints: +{"Field":"_meta/flow_truncated","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"boolWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"dateValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"datetimeValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"flow_document","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is the document in the current materialization"} +{"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"int64","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"intWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"key","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is a key in the current materialization"} +{"Field":"multiple","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"nonScalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"nullValue","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Cannot materialize a field where the only possible type is 'null'"} +{"Field":"numericString","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"optional","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"requiredNumeric","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"scalarValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"second_root","Type":5,"TypeString":"FIELD_FORBIDDEN","Reason":"Cannot materialize root document projection 'second_root' because field 'flow_document' is already being materialized as the document"} +{"Field":"stringWidenedToJson","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} +{"Field":"timeValue","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} + +Migratable Changes Applied Schema: +{"Name":"_meta/flow_truncated","Nullable":"YES","Type":"bit"} +{"Name":"boolWidenedToJson","Nullable":"YES","Type":"varchar"} +{"Name":"dateValue","Nullable":"YES","Type":"varchar"} +{"Name":"datetimeValue","Nullable":"YES","Type":"varchar"} +{"Name":"flow_document","Nullable":"YES","Type":"varchar"} +{"Name":"flow_published_at","Nullable":"YES","Type":"datetime2"} +{"Name":"int64","Nullable":"YES","Type":"decimal"} +{"Name":"intWidenedToJson","Nullable":"YES","Type":"varchar"} +{"Name":"key","Nullable":"YES","Type":"varchar"} +{"Name":"multiple","Nullable":"YES","Type":"varchar"} +{"Name":"nonScalarValue","Nullable":"YES","Type":"varchar"} +{"Name":"numericString","Nullable":"YES","Type":"varchar"} +{"Name":"optional","Nullable":"YES","Type":"varchar"} +{"Name":"requiredNumeric","Nullable":"YES","Type":"varchar"} +{"Name":"scalarValue","Nullable":"YES","Type":"varchar"} +{"Name":"stringWidenedToJson","Nullable":"YES","Type":"varchar"} +{"Name":"timeValue","Nullable":"YES","Type":"varchar"} + + +Migratable Changes Applied Data: +key (NVARCHAR), _meta/flow_truncated (BIT), boolWidenedToJson (NVARCHAR), dateValue (NVARCHAR), datetimeValue (NVARCHAR), flow_published_at (DATETIME2), int64 (DECIMAL), intWidenedToJson (NVARCHAR), multiple (NVARCHAR), nonScalarValue (NVARCHAR), numericString (NVARCHAR), optional (NVARCHAR), requiredNumeric (NVARCHAR), scalarValue (NVARCHAR), stringWidenedToJson (NVARCHAR), timeValue (NVARCHAR), flow_document (NVARCHAR) +1, false, true, 2024-01-01, 2024-01-01 01:01:01.111111, 2024-09-13T01:01:01Z, 1, 999, , , 123, , 456, test, hello, 01:01:01.000000, {} + diff --git a/materialize-azure-fabric-warehouse/Dockerfile b/materialize-azure-fabric-warehouse/Dockerfile new file mode 100644 index 0000000000..553fe4d07f --- /dev/null +++ b/materialize-azure-fabric-warehouse/Dockerfile @@ -0,0 +1,38 @@ +ARG BASE_IMAGE=ghcr.io/estuary/base-image:v1 + +# Build Stage +################################################################################ +FROM --platform=linux/amd64 golang:1.22-bullseye as builder + +WORKDIR /builder + +# Download & compile dependencies early. Doing this separately allows for layer +# caching opportunities when no dependencies are updated. +COPY go.* ./ +RUN go mod download + +COPY go ./go +COPY materialize-boilerplate ./materialize-boilerplate +COPY materialize-azure-fabric-warehouse ./materialize-azure-fabric-warehouse +COPY materialize-sql ./materialize-sql + +# Test and build the connector. +RUN go test -tags nozstd -v ./materialize-sql/... +RUN go test -tags nozstd -v ./materialize-azure-fabric-warehouse/... +RUN go build -tags nozstd -v -o ./connector ./materialize-azure-fabric-warehouse/... + +# Runtime Stage +################################################################################ +FROM ${BASE_IMAGE} + +WORKDIR /connector +ENV PATH="/connector:$PATH" + +COPY --from=builder /builder/connector ./materialize-azure-fabric-warehouse + +# Avoid running the connector as root. +USER nonroot:nonroot + +LABEL FLOW_RUNTIME_PROTOCOL=materialize + +ENTRYPOINT ["/connector/materialize-azure-fabric-warehouse"] diff --git a/materialize-azure-fabric-warehouse/VERSION b/materialize-azure-fabric-warehouse/VERSION new file mode 100644 index 0000000000..626799f0f8 --- /dev/null +++ b/materialize-azure-fabric-warehouse/VERSION @@ -0,0 +1 @@ +v1 diff --git a/materialize-azure-fabric-warehouse/client.go b/materialize-azure-fabric-warehouse/client.go new file mode 100644 index 0000000000..f67c6253c8 --- /dev/null +++ b/materialize-azure-fabric-warehouse/client.go @@ -0,0 +1,447 @@ +package main + +import ( + "context" + stdsql "database/sql" + "encoding/base64" + "errors" + "fmt" + "net" + "path" + "slices" + "strings" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + boilerplate "github.com/estuary/connectors/materialize-boilerplate" + sql "github.com/estuary/connectors/materialize-sql" + "github.com/google/uuid" + "github.com/segmentio/encoding/json" + log "github.com/sirupsen/logrus" +) + +var _ sql.SchemaManager = (*client)(nil) + +type client struct { + db *stdsql.DB + cfg *config + ep *sql.Endpoint +} + +func newClient(ctx context.Context, ep *sql.Endpoint) (sql.Client, error) { + cfg := ep.Config.(*config) + + db, err := cfg.db() + if err != nil { + return nil, err + } + + return &client{ + db: db, + cfg: cfg, + ep: ep, + }, nil +} + +func (c *client) InfoSchema(ctx context.Context, resourcePaths [][]string) (*boilerplate.InfoSchema, error) { + // The body of this function is a copy of sql.StdFetchInfoSchema, except the + // identifiers for the information schema views need to be in capital + // letters for Fabric Warehouse. I'd hope to replace this at some point with + // REST API calls if the necessary REST endpoints added to Fabric Warehouse, + // since right now there's only endpoints to list warehouses and they don't + // even work with service principal authentication. + is := boilerplate.NewInfoSchema( + sql.ToLocatePathFn(dialect.TableLocator), + dialect.ColumnLocator, + ) + + if len(resourcePaths) == 0 { + return is, nil + } + + schemas := make([]string, 0, len(resourcePaths)) + for _, p := range resourcePaths { + loc := dialect.TableLocator(p) + schemas = append(schemas, dialect.Literal(loc.TableSchema)) + } + + slices.Sort(schemas) + schemas = slices.Compact(schemas) + + tables, err := c.db.QueryContext(ctx, fmt.Sprintf(` + select table_schema, table_name + from INFORMATION_SCHEMA.TABLES + where table_catalog = %s + and table_schema in (%s); + `, + dialect.Literal(c.cfg.Warehouse), + strings.Join(schemas, ","), + )) + if err != nil { + return nil, err + } + defer tables.Close() + + type tableRow struct { + TableSchema string + TableName string + } + + for tables.Next() { + var t tableRow + if err := tables.Scan(&t.TableSchema, &t.TableName); err != nil { + return nil, err + } + + is.PushResource(t.TableSchema, t.TableName) + } + + columns, err := c.db.QueryContext(ctx, fmt.Sprintf(` + select table_schema, table_name, column_name, is_nullable, data_type, character_maximum_length, column_default + from INFORMATION_SCHEMA.COLUMNS + where table_catalog = %s + and table_schema in (%s); + `, + dialect.Literal(c.cfg.Warehouse), + strings.Join(schemas, ","), + )) + if err != nil { + return nil, err + } + defer columns.Close() + + type columnRow struct { + tableRow + ColumnName string + IsNullable string + DataType string + CharacterMaximumLength stdsql.NullInt64 + ColumnDefault stdsql.NullString + } + + for columns.Next() { + var c columnRow + if err := columns.Scan(&c.TableSchema, &c.TableName, &c.ColumnName, &c.IsNullable, &c.DataType, &c.CharacterMaximumLength, &c.ColumnDefault); err != nil { + return nil, err + } + + is.PushField(boilerplate.EndpointField{ + Name: c.ColumnName, + Nullable: strings.EqualFold(c.IsNullable, "yes"), + Type: c.DataType, + CharacterMaxLength: int(c.CharacterMaximumLength.Int64), + HasDefault: c.ColumnDefault.Valid, + }, c.TableSchema, c.TableName) + } + if err := columns.Err(); err != nil { + return nil, err + } + + return is, nil +} + +func (c *client) CreateTable(ctx context.Context, tc sql.TableCreate) error { + _, err := c.db.ExecContext(ctx, tc.TableCreateSql) + return err +} + +func (c *client) DeleteTable(ctx context.Context, path []string) (string, boilerplate.ActionApplyFn, error) { + stmt := fmt.Sprintf("DROP TABLE %s;", dialect.Identifier(path...)) + + return stmt, func(ctx context.Context) error { + _, err := c.db.ExecContext(ctx, stmt) + return err + }, nil +} + +func (c *client) AlterTable(ctx context.Context, ta sql.TableAlter) (string, boilerplate.ActionApplyFn, error) { + if len(ta.DropNotNulls) != 0 { + return "", nil, fmt.Errorf("cannot drop nullability constraints but got %d DropNotNulls for table %s", len(ta.DropNotNulls), ta.Identifier) + } + + var addColumnsQuery []string + if len(ta.AddColumns) > 0 { + var addColumnsStmt strings.Builder + if err := tplAlterTableColumns.Execute(&addColumnsStmt, ta); err != nil { + return "", nil, fmt.Errorf("rendering alter table columns statement: %w", err) + } + + addColumnsQuery = append(addColumnsQuery, addColumnsStmt.String()) + } + + var migrateQueries []string + if len(ta.ColumnTypeChanges) > 0 { + sourceTable := ta.Table.Identifier + tmpTable := dialect.Identifier(ta.InfoLocation.TableSchema, uuid.NewString()) + + params := migrateParams{ + SourceTable: sourceTable, + TmpName: tmpTable, + } + + for _, col := range ta.Columns() { + mCol := migrateColumn{Identifier: col.Identifier} + + if n := slices.IndexFunc(ta.ColumnTypeChanges, func(m sql.ColumnTypeMigration) bool { + return m.Identifier == col.Identifier + }); n != -1 { + m := ta.ColumnTypeChanges[n] + mCol.CastSQL = m.CastSQL(m) + } + + params.Columns = append(params.Columns, mCol) + } + + var migrateTableQuery strings.Builder + if err := tplCreateMigrationTable.Execute(&migrateTableQuery, params); err != nil { + return "", nil, fmt.Errorf("rendering create migration table statement: %w", err) + } + + migrateQueries = []string{ + migrateTableQuery.String(), + fmt.Sprintf("DROP TABLE %s;", sourceTable), + fmt.Sprintf( + "EXEC sp_rename %s, %s;", + dialect.Literal(tmpTable), + dialect.Literal(dialect.Identifier(ta.InfoLocation.TableName))), + } + } + + allQueries := append(addColumnsQuery, migrateQueries...) + + return strings.Join(allQueries, "\n"), func(ctx context.Context) error { + if len(addColumnsQuery) == 1 { // slice is either empty or has a single query + if _, err := c.db.ExecContext(ctx, addColumnsQuery[0]); err != nil { + log.WithField("query", addColumnsQuery[0]).Error("alter table query failed") + return err + } + } + + // The queries for a table migration are run in a transaction since this + // involves copying the existing table into a "temporary" table with a + // different name, dropping the original table, and renaming the + // temporary table to replace the original table. This will all be done + // as an atomic action via the transaction. + txn, err := c.db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("db.BeginTx: %w", err) + } + defer txn.Rollback() + + for _, query := range migrateQueries { + if _, err := txn.ExecContext(ctx, query); err != nil { + log.WithField("query", query).Error("migrate table query failed") + return err + } + } + + if err := txn.Commit(); err != nil { + return fmt.Errorf("txn.Commit: %w", err) + } + + return nil + }, nil +} + +func (c *client) ListSchemas(ctx context.Context) ([]string, error) { + rows, err := c.db.QueryContext(ctx, "select schema_name from INFORMATION_SCHEMA.SCHEMATA") + if err != nil { + return nil, fmt.Errorf("querying schemata: %w", err) + } + defer rows.Close() + + out := []string{} + + for rows.Next() { + var schema string + if err := rows.Scan(&schema); err != nil { + return nil, fmt.Errorf("scanning row: %w", err) + } + out = append(out, schema) + } + + return out, nil +} + +func (c *client) CreateSchema(ctx context.Context, schemaName string) error { + return sql.StdCreateSchema(ctx, c.db, dialect, schemaName) +} + +type badRequestResponseBody struct { + Error string `json:"error"` + ErrorDescription string `json:"error_description"` + ErrorCodes []int `json:"error_codes"` +} + +func preReqs(ctx context.Context, conf any, tenant string) *sql.PrereqErr { + errs := &sql.PrereqErr{} + + cfg := conf.(*config) + + db, err := cfg.db() + if err != nil { + errs.Err(err) + return errs + } + + pingCtx, cancel := context.WithTimeout(ctx, 20*time.Second) + defer cancel() + + var wh int + if err := db.QueryRowContext(pingCtx, fmt.Sprintf("SELECT 1 from sys.databases WHERE name = %s;", dialect.Literal(cfg.Warehouse))).Scan(&wh); err != nil { + var authErr *azidentity.AuthenticationFailedError + var netOpErr *net.OpError + + if errors.As(err, &netOpErr) { + err = fmt.Errorf("could not connect to endpoint: ensure the connection string '%s' is correct", cfg.ConnectionString) + } else if errors.Is(err, stdsql.ErrNoRows) { + err = fmt.Errorf("warehouse '%s' does not exist", cfg.Warehouse) + } else if errors.As(err, &authErr) { + var res badRequestResponseBody + if err := json.NewDecoder(authErr.RawResponse.Body).Decode(&res); err != nil { + panic(err) + } + + if slices.Contains(res.ErrorCodes, 700016) { + err = fmt.Errorf("invalid client ID '%s': ensure that the client ID for the correct application is configured", cfg.ClientID) + } else if slices.Contains(res.ErrorCodes, 7000215) { + err = fmt.Errorf("invalid client secret provided: ensure the secret being sent in the request is the client secret value, not the client secret ID, for a secret added to app '%s'", cfg.ClientID) + } + + log.WithField("response", res).Error("connection error") + } + + errs.Err(err) + } + + // Create, read, and delete an object per the configuration. Storage account + // keys don't have fine-grained permissions so if _anything_ works, + // everything should work. + testKey := path.Join(cfg.Directory, uuid.NewString()) + data := []byte("testing") + if storage, err := cfg.storageClient(); err != nil { + errs.Err(err) + } else if _, err := storage.UploadBuffer(ctx, cfg.ContainerName, testKey, data, nil); err != nil { + var netOpErr *net.OpError + + if errors.As(err, &netOpErr) { + err = fmt.Errorf( + "could not connect to blob storage endpoint '%s': ensure the storage account name '%s', storage account key, and container name '%s' are correct", + storage.URL(), cfg.StorageAccountName, cfg.ContainerName, + ) + } else { + err = fmt.Errorf("uploading test blob: %w", err) + } + + errs.Err(err) + } else if _, err := storage.DownloadBuffer(ctx, cfg.ContainerName, testKey, data, nil); err != nil { + errs.Err(fmt.Errorf("downloading test blob: %w", err)) + } else if _, err := storage.DeleteBlob(ctx, cfg.ContainerName, testKey, nil); err != nil { + errs.Err(fmt.Errorf("deleting test blob: %w", err)) + } + + return errs +} + +func (c *client) ExecStatements(ctx context.Context, statements []string) error { + return sql.StdSQLExecStatements(ctx, c.db, statements) +} + +func (c *client) InstallFence(ctx context.Context, checkpoints sql.Table, fence sql.Fence) (sql.Fence, error) { + var txn, err = c.db.BeginTx(ctx, nil) + if err != nil { + return sql.Fence{}, fmt.Errorf("db.BeginTx: %w", err) + } + defer func() { + if txn != nil { + _ = txn.Rollback() + } + }() + + // Increment the fence value of _any_ checkpoint which overlaps our key range. + if _, err = txn.Exec( + fmt.Sprintf(` + UPDATE %s + SET fence=fence+1 + WHERE materialization=%s + AND key_end>=%s + AND key_begin<=%s + ; + `, + checkpoints.Identifier, + checkpoints.Keys[0].Placeholder, + checkpoints.Keys[1].Placeholder, + checkpoints.Keys[2].Placeholder, + ), + fence.Materialization, + fence.KeyBegin, + fence.KeyEnd, + ); err != nil { + return sql.Fence{}, fmt.Errorf("incrementing fence: %w", err) + } + + // Read the checkpoint with the narrowest [key_begin, key_end] which fully overlaps our range. + var readBegin, readEnd uint32 + var checkpoint string + + if err = txn.QueryRow( + fmt.Sprintf(` + SELECT TOP 1 fence, key_begin, key_end, "checkpoint" + FROM %s + WHERE materialization=%s + AND key_begin<=%s + AND key_end>=%s + ORDER BY key_end - key_begin ASC + ; + `, + checkpoints.Identifier, + checkpoints.Keys[0].Placeholder, + checkpoints.Keys[1].Placeholder, + checkpoints.Keys[2].Placeholder, + ), + fence.Materialization, + fence.KeyBegin, + fence.KeyEnd, + ).Scan(&fence.Fence, &readBegin, &readEnd, &checkpoint); err == stdsql.ErrNoRows { + // Set an invalid range, which compares as unequal to trigger an insertion below. + readBegin, readEnd = 1, 0 + } else if err != nil { + return sql.Fence{}, fmt.Errorf("scanning fence and checkpoint: %w", err) + } else if fence.Checkpoint, err = base64.StdEncoding.DecodeString(checkpoint); err != nil { + return sql.Fence{}, fmt.Errorf("base64.Decode(checkpoint): %w", err) + } + + // If a checkpoint for this exact range doesn't exist then insert it now. + if readBegin == fence.KeyBegin && readEnd == fence.KeyEnd { + // Exists; no-op. + } else if _, err = txn.Exec( + fmt.Sprintf( + `INSERT INTO %s (materialization, key_begin, key_end, fence, "checkpoint") VALUES (%s, %s, %s, %s, %s);`, + checkpoints.Identifier, + checkpoints.Keys[0].Placeholder, + checkpoints.Keys[1].Placeholder, + checkpoints.Keys[2].Placeholder, + checkpoints.Values[0].Placeholder, + checkpoints.Values[1].Placeholder, + ), + fence.Materialization, + fence.KeyBegin, + fence.KeyEnd, + fence.Fence, + base64.StdEncoding.EncodeToString(fence.Checkpoint), + ); err != nil { + return sql.Fence{}, fmt.Errorf("inserting fence: %w", err) + } + + err = txn.Commit() + txn = nil // Disable deferred rollback. + + if err != nil { + return sql.Fence{}, fmt.Errorf("txn.Commit: %w", err) + } + return fence, nil +} + +func (c *client) Close() { + c.db.Close() +} diff --git a/materialize-azure-fabric-warehouse/driver.go b/materialize-azure-fabric-warehouse/driver.go new file mode 100644 index 0000000000..e3efd9334b --- /dev/null +++ b/materialize-azure-fabric-warehouse/driver.go @@ -0,0 +1,156 @@ +package main + +import ( + "context" + stdsql "database/sql" + "encoding/base64" + "encoding/json" + "fmt" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" + "github.com/estuary/connectors/go/dbt" + boilerplate "github.com/estuary/connectors/materialize-boilerplate" + sql "github.com/estuary/connectors/materialize-sql" + pf "github.com/estuary/flow/go/protocols/flow" + "github.com/microsoft/go-mssqldb/azuread" +) + +type config struct { + ClientID string `json:"clientID" jsonschema:"title=Client ID,description=Client ID for the service principal used to connect to the Azure Fabric Warehouse." jsonschema_extras:"order=0"` + ClientSecret string `json:"clientSecret" jsonschema:"title=Client Secret,description=Client Secret for the service principal used to connect to the Azure Fabric Warehouse." jsonschema_extras:"order=1,secret=true"` + Warehouse string `json:"warehouse" jsonschema:"title=Warehouse,description=Name of the Azure Fabric Warehouse to connect to." jsonschema_extras:"order=2"` + Schema string `json:"schema" jsonschema:"title=Schema,description=Schema for bound collection tables (unless overridden within the binding resource configuration) as well as associated materialization metadata tables." jsonschema_extras:"order=3"` + ConnectionString string `json:"connectionString" jsonschema:"title=Connection String,description=SQL connection string for the Azure Fabric Warehouse." jsonschema_extras:"order=4"` + StorageAccountName string `json:"storageAccountName" jsonschema:"title=Storage Account Name,description=Name of the storage account that temporary files will be written to." jsonschema_extras:"order=5"` + StorageAccountKey string `json:"storageAccountKey" jsonschema:"title=Storage Account Key,description=Storage account key for the storage account that temporary files will be written to." jsonschema_extras:"order=6,secret=true"` + ContainerName string `json:"containerName" jsonschema:"title=Storage Account Container Name,description=Name of the container in the storage account where temporary files will be written." jsonschema_extras:"order=7"` + Directory string `json:"directory,omitempty" jsonschema:"title=Directory,description=Optional prefix that will be used for temporary files." jsonschema_extras:"order=8"` + HardDelete bool `json:"hardDelete,omitempty" jsonschema:"title=Hard Delete,description=If this option is enabled items deleted in the source will also be deleted from the destination. By default is disabled and _meta/op in the destination will signify whether rows have been deleted (soft-delete).,default=false" jsonschema_extras:"order=9"` + Schedule boilerplate.ScheduleConfig `json:"syncSchedule,omitempty" jsonschema:"title=Sync Schedule,description=Configure schedule of transactions for the materialization."` + DBTJobTrigger dbt.JobConfig `json:"dbt_job_trigger,omitempty" jsonschema:"title=dbt Cloud Job Trigger,description=Trigger a dbt job when new data is available"` +} + +func (c *config) Validate() error { + var requiredProperties = [][]string{ + {"clientID", c.ClientID}, + {"clientSecret", c.ClientSecret}, + {"warehouse", c.Warehouse}, + {"schema", c.Schema}, + {"connectionString", c.ConnectionString}, + {"storageAccountName", c.StorageAccountName}, + {"storageAccountKey", c.StorageAccountKey}, + {"containerName", c.ContainerName}, + } + for _, req := range requiredProperties { + if req[1] == "" { + return fmt.Errorf("missing '%s'", req[0]) + } + } + + if c.Directory != "" { + if strings.HasPrefix(c.Directory, "/") { + return fmt.Errorf("directory %q cannot start with /", c.Directory) + } + } + + if err := c.Schedule.Validate(); err != nil { + return err + } else if err := c.DBTJobTrigger.Validate(); err != nil { + return err + } + + return nil +} + +func (c *config) db() (*stdsql.DB, error) { + db, err := stdsql.Open( + azuread.DriverName, + fmt.Sprintf( + "server=%s;user id=%s;password=%s;port=%d;database=%s;fedauth=ActiveDirectoryServicePrincipal", + c.ConnectionString, c.ClientID, c.ClientSecret, 1433, c.Warehouse, + )) + if err != nil { + return nil, err + } + + return db, nil +} + +func (c *config) storageClient() (*azblob.Client, error) { + if _, err := base64.StdEncoding.DecodeString(c.StorageAccountKey); err != nil { + return nil, fmt.Errorf("invalid storage account key: must be base64-encoded") + } + + cred, err := azblob.NewSharedKeyCredential(c.StorageAccountName, c.StorageAccountKey) + if err != nil { + return nil, fmt.Errorf("failed to create storage client credential: %w", err) + } + + return azblob.NewClientWithSharedKeyCredential( + fmt.Sprintf("https://%s.blob.core.windows.net/", c.StorageAccountName), + cred, + nil, + ) +} + +type tableConfig struct { + Table string `json:"table" jsonschema:"title=Table,description=Name of the database table." jsonschema_extras:"x-collection-name=true"` + Schema string `json:"schema,omitempty" jsonschema:"title=Alternative Schema,description=Alternative schema for this table (optional)." jsonschema_extras:"x-schema-name=true"` + Delta bool `json:"delta_updates,omitempty" jsonschema:"title=Delta Update,description=Should updates to this table be done via delta updates." jsonschema_extras:"x-delta-updates=true"` + + warehouse string +} + +func newTableConfig(ep *sql.Endpoint) sql.Resource { + return &tableConfig{ + // Default to the endpoint schema. This will be over-written by a present `schema` property + // within `raw`. + Schema: ep.Config.(*config).Schema, + warehouse: ep.Config.(*config).Warehouse, + } +} + +func (r tableConfig) Validate() error { + if r.Table == "" { + return fmt.Errorf("missing table") + } + + return nil +} + +func (c tableConfig) Path() sql.TablePath { + return []string{c.warehouse, c.Schema, c.Table} +} + +func (c tableConfig) DeltaUpdates() bool { + return c.Delta +} + +func newDriver() *sql.Driver { + return &sql.Driver{ + DocumentationURL: "https://go.estuary.dev/materialize-azure-fabric-warehouse", + EndpointSpecType: new(config), + ResourceSpecType: new(tableConfig), + StartTunnel: func(ctx context.Context, conf any) error { return nil }, + NewEndpoint: func(ctx context.Context, raw json.RawMessage, tenant string) (*sql.Endpoint, error) { + var cfg = new(config) + if err := pf.UnmarshalStrict(raw, cfg); err != nil { + return nil, fmt.Errorf("could not parse endpoint configuration: %w", err) + } + + return &sql.Endpoint{ + Config: cfg, + Dialect: dialect, + MetaCheckpoints: sql.FlowCheckpointsTable([]string{cfg.Warehouse, cfg.Schema}), + NewClient: newClient, + CreateTableTemplate: tplCreateTargetTable, + NewResource: newTableConfig, + NewTransactor: newTransactor, + Tenant: tenant, + ConcurrentApply: true, + }, nil + }, + PreReqs: preReqs, + } +} diff --git a/materialize-azure-fabric-warehouse/driver_test.go b/materialize-azure-fabric-warehouse/driver_test.go new file mode 100644 index 0000000000..381ece9a56 --- /dev/null +++ b/materialize-azure-fabric-warehouse/driver_test.go @@ -0,0 +1,250 @@ +package main + +import ( + "context" + stdsql "database/sql" + "encoding/json" + "fmt" + "os" + "slices" + "strings" + "testing" + + "github.com/bradleyjkemp/cupaloy" + boilerplate "github.com/estuary/connectors/materialize-boilerplate" + sql "github.com/estuary/connectors/materialize-sql" + pm "github.com/estuary/flow/go/protocols/materialize" + "github.com/stretchr/testify/require" +) + +func mustGetCfg(t *testing.T) config { + if os.Getenv("TEST_DATABASE") != "yes" { + t.Skipf("skipping %q: ${TEST_DATABASE} != \"yes\"", t.Name()) + return config{} + } + + out := config{} + + for _, prop := range []struct { + key string + dest *string + }{ + {"FABRIC_WAREHOUSE_CLIENT_ID", &out.ClientID}, + {"FABRIC_WAREHOUSE_CLIENT_SECRET", &out.ClientSecret}, + {"FABRIC_WAREHOUSE_WAREHOUSE", &out.Warehouse}, + {"FABRIC_WAREHOUSE_SCHEMA", &out.Schema}, + {"FABRIC_WAREHOUSE_CONNECTION_STRING", &out.ConnectionString}, + {"FABRIC_WAREHOUSE_STORAGE_ACCOUNT_NAME", &out.StorageAccountName}, + {"FABRIC_WAREHOUSE_STORAGE_ACCOUNT_KEY", &out.StorageAccountKey}, + {"FABRIC_WAREHOUSE_CONTAINER_NAME", &out.ContainerName}, + } { + *prop.dest = os.Getenv(prop.key) + } + + require.NoError(t, out.Validate()) + + return out +} + +func TestValidateAndApply(t *testing.T) { + ctx := context.Background() + + cfg := mustGetCfg(t) + + resourceConfig := tableConfig{ + Table: "target", + Schema: cfg.Schema, + Delta: true, + warehouse: cfg.Warehouse, + } + + boilerplate.RunValidateAndApplyTestCases( + t, + newDriver(), + cfg, + resourceConfig, + func(t *testing.T) string { + t.Helper() + + db, err := cfg.db() + require.NoError(t, err) + defer db.Close() + + sch, err := getSchema(ctx, db, cfg.Warehouse, resourceConfig.Schema, resourceConfig.Table) + require.NoError(t, err) + + return sch + }, + func(t *testing.T) { + t.Helper() + + db, err := cfg.db() + require.NoError(t, err) + defer db.Close() + + _, _ = db.ExecContext(ctx, fmt.Sprintf("drop table %s;", dialect.Identifier(cfg.Warehouse, resourceConfig.Schema, resourceConfig.Table))) + }, + ) +} + +func TestValidateAndApplyMigrations(t *testing.T) { + ctx := context.Background() + + cfg := mustGetCfg(t) + + resourceConfig := tableConfig{ + Table: "target", + Schema: cfg.Schema, + warehouse: cfg.Warehouse, + } + + db, err := cfg.db() + require.NoError(t, err) + defer db.Close() + + sql.RunValidateAndApplyMigrationsTests( + t, + newDriver(), + cfg, + resourceConfig, + func(t *testing.T) string { + t.Helper() + + sch, err := getSchema(ctx, db, cfg.Warehouse, resourceConfig.Schema, resourceConfig.Table) + require.NoError(t, err) + + return sch + }, + func(t *testing.T, cols []string, values []string) { + t.Helper() + + var keys = make([]string, len(cols)) + for i, col := range cols { + keys[i] = dialect.Identifier(col) + } + for i := range values { + if values[i] == "true" { + values[i] = "1" + } else if values[i] == "false" { + values[i] = "0" + } + } + + keys = append(keys, dialect.Identifier("_meta/flow_truncated")) + values = append(values, "0") + keys = append(keys, dialect.Identifier("flow_published_at")) + values = append(values, "'2024-09-13 01:01:01'") + keys = append(keys, dialect.Identifier("flow_document")) + values = append(values, "'{}'") + q := fmt.Sprintf("insert into %s (%s) VALUES (%s);", dialect.Identifier(resourceConfig.Schema, resourceConfig.Table), strings.Join(keys, ","), strings.Join(values, ",")) + _, err = db.ExecContext(ctx, q) + + require.NoError(t, err) + }, + func(t *testing.T) string { + t.Helper() + + rows, err := sql.DumpTestTable(t, db, dialect.Identifier(resourceConfig.Schema, resourceConfig.Table)) + + require.NoError(t, err) + + return rows + }, + func(t *testing.T) { + t.Helper() + _, _ = db.ExecContext(ctx, fmt.Sprintf("drop table %s;", dialect.Identifier(resourceConfig.Schema, resourceConfig.Table))) + }, + ) +} + +func TestFencingCases(t *testing.T) { + var ctx = context.Background() + + var cfg = mustGetCfg(t) + + c, err := newClient(ctx, &sql.Endpoint{Config: &cfg}) + require.NoError(t, err) + defer c.Close() + + sql.RunFenceTestCases(t, + c, + []string{cfg.Warehouse, cfg.Schema, "temp_test_fencing_checkpoints"}, + dialect, + tplCreateTargetTable, + func(table sql.Table, fence sql.Fence) error { + var fenceUpdate strings.Builder + if err := tplUpdateFence.Execute(&fenceUpdate, fence); err != nil { + return fmt.Errorf("evaluating fence template: %w", err) + } + return c.ExecStatements(ctx, []string{fenceUpdate.String()}) + }, + func(table sql.Table) (out string, err error) { + out, err = sql.StdDumpTable(ctx, c.(*client).db, table) + return strings.Replace(out, "\"checkpoint\"", "checkpoint", 1), err + }, + ) +} + +func TestSpecification(t *testing.T) { + var resp, err = newDriver(). + Spec(context.Background(), &pm.Request_Spec{}) + require.NoError(t, err) + + formatted, err := json.MarshalIndent(resp, "", " ") + require.NoError(t, err) + + cupaloy.SnapshotT(t, formatted) +} + +func getSchema(ctx context.Context, db *stdsql.DB, warehouse, schema, table string) (string, error) { + q := fmt.Sprintf(` + select column_name, is_nullable, data_type + from INFORMATION_SCHEMA.COLUMNS + where + table_catalog = '%s' + and table_schema = '%s' + and table_name = '%s'; +`, + warehouse, + schema, + table, + ) + + rows, err := db.QueryContext(ctx, q) + if err != nil { + return "", err + } + defer rows.Close() + + type foundColumn struct { + Name string + Nullable string // string "YES" or "NO" + Type string + } + + cols := []foundColumn{} + for rows.Next() { + var c foundColumn + if err := rows.Scan(&c.Name, &c.Nullable, &c.Type); err != nil { + return "", err + } + cols = append(cols, c) + } + if err := rows.Err(); err != nil { + return "", err + } + + slices.SortFunc(cols, func(a, b foundColumn) int { + return strings.Compare(a.Name, b.Name) + }) + + var out strings.Builder + enc := json.NewEncoder(&out) + for _, c := range cols { + if err := enc.Encode(c); err != nil { + return "", err + } + } + + return out.String(), nil +} diff --git a/materialize-azure-fabric-warehouse/main.go b/materialize-azure-fabric-warehouse/main.go new file mode 100644 index 0000000000..908a87692e --- /dev/null +++ b/materialize-azure-fabric-warehouse/main.go @@ -0,0 +1,7 @@ +package main + +import boilerplate "github.com/estuary/connectors/materialize-boilerplate" + +func main() { + boilerplate.RunMain(newDriver()) +} diff --git a/materialize-azure-fabric-warehouse/reserved_words.go b/materialize-azure-fabric-warehouse/reserved_words.go new file mode 100644 index 0000000000..39787bf6c1 --- /dev/null +++ b/materialize-azure-fabric-warehouse/reserved_words.go @@ -0,0 +1,489 @@ +package main + +// https://learn.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql?view=sql-server-2017 +var SQLSERVER_RESERVED_WORDS = []string{ +"absolute", +"action", +"ada", +"add", +"admin", +"after", +"aggregate", +"alias", +"all", +"allocate", +"alter", +"and", +"any", +"are", +"array", +"as", +"asc", +"asensitive", +"assertion", +"asymmetric", +"at", +"atomic", +"authorization", +"avg", +"backup", +"before", +"begin", +"between", +"binary", +"bit", +"bit_length", +"blob", +"boolean", +"both", +"breadth", +"break", +"browse", +"bulk", +"by", +"call", +"called", +"cardinality", +"cascade", +"cascaded", +"case", +"cast", +"catalog", +"char", +"char_length", +"character", +"character_length", +"check", +"checkpoint", +"class", +"clob", +"close", +"clustered", +"coalesce", +"collate", +"collation", +"collect", +"column", +"commit", +"completion", +"compute", +"condition", +"connect", +"connection", +"constraint", +"constraints", +"constructor", +"contains", +"containstable", +"continue", +"convert", +"corr", +"corresponding", +"count", +"covar_pop", +"covar_samp", +"create", +"cross", +"cube", +"cume_dist", +"current", +"current_catalog", +"current_date", +"current_default_transform_group", +"current_path", +"current_role", +"current_schema", +"current_time", +"current_timestamp", +"current_transform_group_for_type", +"current_user", +"cursor", +"cycle", +"data", +"database", +"date", +"day", +"dbcc", +"deallocate", +"dec", +"decimal", +"declare", +"default", +"deferrable", +"deferred", +"delete", +"deny", +"depth", +"deref", +"desc", +"describe", +"descriptor", +"destroy", +"destructor", +"deterministic", +"diagnostics", +"dictionary", +"disconnect", +"disk", +"distinct", +"distributed", +"domain", +"double", +"drop", +"dump", +"dynamic", +"each", +"element", +"else", +"end", +"end-exec", +"equals", +"errlvl", +"escape", +"every", +"except", +"exception", +"exec", +"execute", +"exists", +"exit", +"external", +"extract", +"false", +"fetch", +"file", +"fillfactor", +"filter", +"first", +"float", +"for", +"foreign", +"fortran", +"found", +"free", +"freetext", +"freetexttable", +"from", +"full", +"fulltexttable", +"function", +"fusion", +"general", +"get", +"global", +"go", +"goto", +"grant", +"group", +"grouping", +"having", +"hold", +"holdlock", +"host", +"hour", +"identity", +"identity_insert", +"identitycol", +"if", +"ignore", +"immediate", +"in", +"include", +"index", +"indicator", +"initialize", +"initially", +"inner", +"inout", +"input", +"insensitive", +"insert", +"int", +"integer", +"intersect", +"intersection", +"interval", +"into", +"is", +"isolation", +"iterate", +"join", +"key", +"kill", +"language", +"large", +"last", +"lateral", +"leading", +"left", +"less", +"level", +"like", +"like_regex", +"limit", +"lineno", +"ln", +"load", +"local", +"localtime", +"localtimestamp", +"locator", +"lower", +"map", +"match", +"max", +"member", +"merge", +"method", +"min", +"minute", +"mod", +"modifies", +"modify", +"module", +"month", +"multiset", +"names", +"national", +"natural", +"nchar", +"nclob", +"new", +"next", +"no", +"nocheck", +"nonclustered", +"none", +"normalize", +"not", +"null", +"nullif", +"numeric", +"object", +"occurrences_regex", +"octet_length", +"of", +"off", +"offsets", +"old", +"on", +"only", +"open", +"opendatasource", +"openquery", +"openrowset", +"openxml", +"operation", +"option", +"or", +"order", +"ordinality", +"out", +"outer", +"output", +"over", +"overlaps", +"overlay", +"pad", +"parameter", +"parameters", +"partial", +"partition", +"pascal", +"path", +"percent", +"percent_rank", +"percentile_cont", +"percentile_disc", +"pivot", +"plan", +"position", +"position_regex", +"postfix", +"precision", +"prefix", +"preorder", +"prepare", +"preserve", +"primary", +"print", +"prior", +"privileges", +"proc", +"procedure", +"public", +"raiserror", +"range", +"read", +"reads", +"readtext", +"real", +"reconfigure", +"recursive", +"ref", +"references", +"referencing", +"regr_avgx", +"regr_avgy", +"regr_count", +"regr_intercept", +"regr_r2", +"regr_slope", +"regr_sxx", +"regr_sxy", +"regr_syy", +"relative", +"release", +"replication", +"restore", +"restrict", +"result", +"return", +"returns", +"revert", +"revoke", +"right", +"role", +"rollback", +"rollup", +"routine", +"row", +"rowcount", +"rowguidcol", +"rows", +"rule", +"save", +"savepoint", +"schema", +"scope", +"scroll", +"search", +"second", +"section", +"securityaudit", +"select", +"semantickeyphrasetable", +"semanticsimilaritydetailstable", +"semanticsimilaritytable", +"sensitive", +"sequence", +"session", +"session_user", +"set", +"sets", +"setuser", +"shutdown", +"similar", +"size", +"smallint", +"some", +"space", +"specific", +"specifictype", +"sql", +"sqlca", +"sqlcode", +"sqlerror", +"sqlexception", +"sqlstate", +"sqlwarning", +"start", +"state", +"statement", +"static", +"statistics", +"stddev_pop", +"stddev_samp", +"structure", +"submultiset", +"substring", +"substring_regex", +"sum", +"symmetric", +"system", +"system_user", +"table", +"tablesample", +"temporary", +"terminate", +"textsize", +"than", +"then", +"time", +"timestamp", +"timezone_hour", +"timezone_minute", +"to", +"top", +"trailing", +"tran", +"transaction", +"translate", +"translate_regex", +"translation", +"treat", +"trigger", +"trim", +"true", +"truncate", +"try_convert", +"tsequal", +"uescape", +"under", +"union", +"unique", +"unknown", +"unnest", +"unpivot", +"update", +"updatetext", +"upper", +"usage", +"use", +"user", +"using", +"value", +"values", +"var_pop", +"var_samp", +"varchar", +"variable", +"varying", +"view", +"waitfor", +"when", +"whenever", +"where", +"while", +"width_bucket", +"window", +"with", +"within group", +"within", +"without", +"work", +"write", +"writetext", +"xmlagg", +"xmlattributes", +"xmlbinary", +"xmlcast", +"xmlcomment", +"xmlconcat", +"xmldocument", +"xmlelement", +"xmlexists", +"xmlforest", +"xmliterate", +"xmlnamespaces", +"xmlparse", +"xmlpi", +"xmlquery", +"xmlserialize", +"xmltable", +"xmltext", +"xmlvalidate", +"year", +"zone", +} + diff --git a/materialize-azure-fabric-warehouse/sqlgen.go b/materialize-azure-fabric-warehouse/sqlgen.go new file mode 100644 index 0000000000..4afd0e24ed --- /dev/null +++ b/materialize-azure-fabric-warehouse/sqlgen.go @@ -0,0 +1,274 @@ +package main + +import ( + "fmt" + "slices" + "strings" + + sql "github.com/estuary/connectors/materialize-sql" +) + +var dialect = func() sql.Dialect { + mapper := sql.NewDDLMapper( + sql.FlatTypeMappings{ + sql.INTEGER: sql.MapSignedInt64( + sql.MapStatic("BIGINT"), + sql.MapStatic("DECIMAL(38,0)", sql.AlsoCompatibleWith("DECIMAL")), + ), + sql.NUMBER: sql.MapStatic("FLOAT"), + sql.BOOLEAN: sql.MapStatic("BIT"), + sql.OBJECT: sql.MapStatic("VARCHAR(MAX)", sql.AlsoCompatibleWith("VARCHAR"), sql.UsingConverter(sql.ToJsonString)), + sql.ARRAY: sql.MapStatic("VARCHAR(MAX)", sql.AlsoCompatibleWith("VARCHAR"), sql.UsingConverter(sql.ToJsonString)), + sql.BINARY: sql.MapStatic("VARBINARY(MAX)", sql.AlsoCompatibleWith("VARBINARY")), + sql.MULTIPLE: sql.MapStatic("VARCHAR(MAX)", sql.AlsoCompatibleWith("VARCHAR"), sql.UsingConverter(sql.ToJsonString)), + sql.STRING_INTEGER: sql.MapStringMaxLen( + sql.MapStatic("DECIMAL(38,0)", sql.AlsoCompatibleWith("DECIMAL"), sql.UsingConverter(sql.StrToInt)), + sql.MapStatic("VARCHAR(MAX)", sql.AlsoCompatibleWith("VARCHAR"), sql.UsingConverter(sql.ToStr)), + // A 96-bit integer is 39 characters long, but not all 39 digit + // integers will fit in one. + 38, + ), + sql.STRING_NUMBER: sql.MapStatic("FLOAT", sql.UsingConverter(sql.StrToFloat(nil, nil, nil))), + sql.STRING: sql.MapString(sql.StringMappings{ + Fallback: sql.MapStatic("VARCHAR(MAX)", sql.AlsoCompatibleWith("VARCHAR")), + WithFormat: map[string]sql.MapProjectionFn{ + "date": sql.MapStatic("DATE", sql.UsingConverter(sql.ClampDate)), + "date-time": sql.MapStatic("DATETIME2(6)", sql.AlsoCompatibleWith("DATETIME2"), sql.UsingConverter(sql.ClampDatetime)), + "time": sql.MapStatic("TIME(6)", sql.AlsoCompatibleWith("TIME")), + }, + }), + }, + // NB: We are not using NOT NULL text so that all columns are created as + // nullable. This is necessary because Fabric Warehouse does not support + // dropping a NOT NULL constraint, so we need to create columns as + // nullable to preserve the ability to change collection schema fields + // from required to not required or add/remove fields from the + // materialization. + ) + + return sql.Dialect{ + MigratableTypes: sql.MigrationSpecs{ + "bigint": {sql.NewMigrationSpec([]string{"DECIMAL(38,0)", "VARCHAR(MAX)"})}, + "decimal": {sql.NewMigrationSpec([]string{"VARCHAR(MAX)"})}, + "float": {sql.NewMigrationSpec([]string{"VARCHAR(MAX)"})}, + "bit": {sql.NewMigrationSpec([]string{"VARCHAR(MAX)"}, sql.WithCastSQL(bitToStringCast))}, + "date": {sql.NewMigrationSpec([]string{"VARCHAR(MAX)"})}, + "datetime2": {sql.NewMigrationSpec([]string{"VARCHAR(MAX)"})}, + "time": {sql.NewMigrationSpec([]string{"VARCHAR(MAX)"})}, + }, + TableLocatorer: sql.TableLocatorFn(func(path []string) sql.InfoTableLocation { + return sql.InfoTableLocation{TableSchema: path[1], TableName: path[2]} + }), + ColumnLocatorer: sql.ColumnLocatorFn(func(field string) string { return field }), + Identifierer: sql.IdentifierFn(sql.JoinTransform(".", + sql.PassThroughTransform( + func(s string) bool { + return sql.IsSimpleIdentifier(s) && !slices.Contains(SQLSERVER_RESERVED_WORDS, strings.ToLower(s)) + }, + sql.QuoteTransform(`"`, `""`), + ))), + Literaler: sql.ToLiteralFn(sql.QuoteTransform("'", "''")), + Placeholderer: sql.PlaceholderFn(func(index int) string { + // parameterIndex starts at 0, but sqlserver parameters start at @p1 + return fmt.Sprintf("@p%d", index+1) + }), + TypeMapper: mapper, + MaxColumnCharLength: 0, + CaseInsensitiveColumns: true, + } +}() + +func bitToStringCast(m sql.ColumnTypeMigration) string { + return fmt.Sprintf( + `CAST(CASE WHEN %s = 1 THEN 'true' WHEN %s = 0 THEN 'false' ELSE NULL END AS %s)`, + m.Identifier, m.Identifier, m.NullableDDL, + ) +} + +type queryParams struct { + sql.Table + URIs []string + StorageAccountKey string + Bounds []sql.MergeBound +} + +type migrateParams struct { + SourceTable string + TmpName string + Columns []migrateColumn +} + +type migrateColumn struct { + Identifier string + CastSQL string +} + +var ( + tplAll = sql.MustParseTemplate(dialect, "root", ` +{{ define "temp_name_load" -}} +flow_temp_table_load_{{ $.Binding }} +{{- end }} + +{{ define "temp_name_store" -}} +flow_temp_table_store_{{ $.Binding }} +{{- end }} + +{{ define "maybe_unbase64" -}} +{{- if eq $.DDL "VARBINARY(MAX)" -}}BASE64_DECODE({{$.Identifier}}){{ else }}{{$.Identifier}}{{ end }} +{{- end }} + +{{ define "maybe_unbase64_lhs" -}} +{{- if eq $.DDL "VARBINARY(MAX)" -}}BASE64_DECODE(l.{{$.Identifier}}){{ else }}l.{{$.Identifier}}{{ end }} +{{- end }} + +{{ define "createTargetTable" }} +CREATE TABLE {{$.Identifier}} ( +{{- range $ind, $col := $.Columns }} + {{- if $ind }},{{ end }} + {{$col.Identifier}} {{$col.DDL}} +{{- end }} +); +{{ end }} + +{{ define "alterTableColumns" }} +ALTER TABLE {{$.Identifier}} ADD +{{- range $ind, $col := $.AddColumns }} + {{- if $ind }},{{ end }} + {{$col.Identifier}} {{$col.NullableDDL}} +{{- end }}; +{{ end }} + +{{ define "createMigrationTable" }} +CREATE TABLE {{$.TmpName}} AS SELECT +{{- range $ind, $col := $.Columns }} + {{- if $ind }},{{ end }} + {{ if $col.CastSQL -}} {{ $col.CastSQL }} AS {{$col.Identifier}} {{- else -}} {{$col.Identifier}} {{- end }} +{{- end }} + FROM {{$.SourceTable}}; +{{ end }} + +{{ define "createLoadTable" }} +CREATE TABLE {{ template "temp_name_load" $ }} ( +{{- range $ind, $key := $.Keys }} + {{- if $ind }},{{ end }} + {{$key.Identifier}} {{- if eq $key.DDL "VARBINARY(MAX)" }} VARCHAR(MAX) {{- else }} {{$key.DDL}} {{- end }} +{{- end }} +); + +COPY INTO {{ template "temp_name_load" $ }} +({{- range $ind, $key := $.Keys }}{{- if $ind }}, {{ end }}{{$key.Identifier}}{{- end }}) +FROM {{ range $ind, $uri := $.URIs }}{{- if $ind }}, {{ end }}'{{$uri}}'{{- end }} +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='{{ $.StorageAccountKey }}') +); +{{ end }} + +{{ define "loadQuery" }} +SELECT {{ $.Binding }}, r.{{$.Document.Identifier}} +FROM {{ template "temp_name_load" . }} AS l +JOIN {{ $.Identifier}} AS r +{{- range $ind, $bound := $.Bounds }} + {{ if $ind }} AND {{ else }} ON {{ end -}} + {{ template "maybe_unbase64_lhs" $bound }} = r.{{ $bound.Identifier }} + {{- if $bound.LiteralLower }} AND r.{{ $bound.Identifier }} >= {{ $bound.LiteralLower }} AND r.{{ $bound.Identifier }} <= {{ $bound.LiteralUpper }}{{ end }} +{{- end }} +{{ end }} + +{{ define "dropLoadTable" }} +DROP TABLE {{ template "temp_name_load" $ }}; +{{- end }} + +{{ define "create_store_staging_table" -}} +CREATE TABLE {{ template "temp_name_store" $ }} ( +{{- range $ind, $col := $.Columns }} + {{- if $ind }},{{ end }} + {{$col.Identifier}} {{- if eq $col.DDL "VARBINARY(MAX)" }} VARCHAR(MAX) {{- else }} {{$col.DDL}} {{- end }} +{{- end }} +); + +COPY INTO {{ template "temp_name_store" $ }} +({{- range $ind, $col := $.Columns }}{{- if $ind }}, {{ end }}{{$col.Identifier}}{{- end }}) +FROM {{ range $ind, $uri := $.URIs }}{{- if $ind }}, {{ end }}'{{$uri}}'{{- end }} +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='{{ $.StorageAccountKey }}') +); +{{- end }} + +-- Azure Fabric Warehouse doesn't yet support an actual "merge" query, +-- so the best we can do is a delete followed by an insert. A true +-- merge query may eventually be supported and we should switch to using +-- that when it is. + +{{ define "storeMergeQuery" }} +{{ template "create_store_staging_table" $ }} + +DELETE r +FROM {{$.Identifier}} AS r +INNER JOIN {{ template "temp_name_store" $ }} AS l +{{- range $ind, $bound := $.Bounds }} + {{ if $ind }} AND {{ else }} ON {{ end -}} + {{ template "maybe_unbase64_lhs" $bound }} = r.{{ $bound.Identifier }} + {{- if $bound.LiteralLower }} AND r.{{ $bound.Identifier }} >= {{ $bound.LiteralLower }} AND r.{{ $bound.Identifier }} <= {{ $bound.LiteralUpper }}{{ end }} +{{- end }}; + +INSERT INTO {{$.Identifier}} ({{- range $ind, $col := $.Columns }}{{- if $ind }}, {{ end }}{{$col.Identifier}}{{- end }}) +SELECT {{ range $ind, $col := $.Columns }}{{- if $ind }}, {{ end }}{{ template "maybe_unbase64" $col }}{{- end }} +FROM {{ template "temp_name_store" $ }} +WHERE {{$.Document.Identifier}} <> '"delete"'; + +DROP TABLE {{ template "temp_name_store" $ }}; +{{ end }} + +-- storeCopyIntoFromStagedQuery is used when there is no data to +-- merge, but there are binary columns that must be converted from +-- the staged CSV data, which is base64 encoded. + +{{ define "storeCopyIntoFromStagedQuery" }} +{{ template "create_store_staging_table" $ }} + +INSERT INTO {{$.Identifier}} ({{- range $ind, $col := $.Columns }}{{- if $ind }}, {{ end }}{{$col.Identifier}}{{ end }}) +SELECT {{ range $ind, $col := $.Columns }}{{- if $ind }}, {{ end }}{{ template "maybe_unbase64" $col }}{{- end }} +FROM {{ template "temp_name_store" $ }}; + +DROP TABLE {{ template "temp_name_store" $ }}; +{{ end }} + +-- storeCopyIntoDirectQuery is used when there is no data to +-- merge and none of the columns are binary. In this case the +-- data can be loaded directly into the target table. + +{{ define "storeCopyIntoDirectQuery" }} +COPY INTO {{$.Identifier}} +({{- range $ind, $col := $.Columns }}{{- if $ind }}, {{ end }}{{$col.Identifier}}{{- end }}) +FROM {{ range $ind, $uri := $.URIs }}{{- if $ind }}, {{ end }}'{{$uri}}'{{- end }} +WITH ( + FILE_TYPE = 'CSV', + COMPRESSION = 'Gzip', + CREDENTIAL = (IDENTITY='Storage Account Key', SECRET='{{ $.StorageAccountKey }}') +); +{{ end }} + +{{ define "updateFence" }} +UPDATE {{ Identifier $.TablePath }} + SET "checkpoint" = {{ Literal (Base64Std $.Checkpoint) }} + WHERE materialization = {{ Literal $.Materialization.String }} + AND key_begin = {{ $.KeyBegin }} + AND key_end = {{ $.KeyEnd }} + AND fence = {{ $.Fence }}; +{{ end }} +`) + tplCreateTargetTable = tplAll.Lookup("createTargetTable") + tplAlterTableColumns = tplAll.Lookup("alterTableColumns") + tplCreateMigrationTable = tplAll.Lookup("createMigrationTable") + tplCreateLoadTable = tplAll.Lookup("createLoadTable") + tplLoadQuery = tplAll.Lookup("loadQuery") + tplDropLoadTable = tplAll.Lookup("dropLoadTable") + tplStoreMergeQuery = tplAll.Lookup("storeMergeQuery") + tplStoreCopyIntoFromStagedQuery = tplAll.Lookup("storeCopyIntoFromStagedQuery") + tplStoreCopyIntoDirectQuery = tplAll.Lookup("storeCopyIntoDirectQuery") + tplUpdateFence = tplAll.Lookup("updateFence") +) diff --git a/materialize-azure-fabric-warehouse/sqlgen_test.go b/materialize-azure-fabric-warehouse/sqlgen_test.go new file mode 100644 index 0000000000..28f0ad8d79 --- /dev/null +++ b/materialize-azure-fabric-warehouse/sqlgen_test.go @@ -0,0 +1,105 @@ +package main + +import ( + "testing" + "text/template" + + "github.com/bradleyjkemp/cupaloy" + sql "github.com/estuary/connectors/materialize-sql" + "github.com/stretchr/testify/require" +) + +func TestSQLGeneration(t *testing.T) { + snap, tables := sql.RunSqlGenTests( + t, + dialect, + func(table string, delta bool) sql.Resource { + return tableConfig{ + Table: table, + Schema: "a-schema", + Delta: delta, + warehouse: "a-warehouse", + } + }, + sql.TestTemplates{ + TableTemplates: []*template.Template{ + tplCreateTargetTable, + }, + TplAddColumns: tplAlterTableColumns, + TplUpdateFence: tplUpdateFence, + }, + ) + + for _, tbl := range tables { + for _, tpl := range []*template.Template{ + tplStoreCopyIntoFromStagedQuery, + tplStoreCopyIntoDirectQuery, + } { + var testcase = tbl.Identifier + " " + tpl.Name() + + snap.WriteString("--- Begin " + testcase + " ---") + require.NoError(t, tpl.Execute(snap, &queryParams{ + Table: tbl, + URIs: []string{"https://some/file1", "https://some/file2"}, + StorageAccountKey: "some-storage-account-key", + })) + snap.WriteString("--- End " + testcase + " ---\n\n") + } + } + + for _, tpl := range []*template.Template{ + tplCreateLoadTable, + tplLoadQuery, + tplDropLoadTable, + tplStoreMergeQuery, + } { + tbl := tables[0] // these queries are never run for delta updates mode + var testcase = tbl.Identifier + " " + tpl.Name() + + snap.WriteString("--- Begin " + testcase + " ---") + require.NoError(t, tpl.Execute(snap, &queryParams{ + Table: tbl, + URIs: []string{"https://some/file1", "https://some/file2"}, + StorageAccountKey: "some-storage-account-key", + Bounds: []sql.MergeBound{ + { + Column: tbl.Keys[0], + LiteralLower: dialect.Literal(int64(10)), + LiteralUpper: dialect.Literal(int64(100)), + }, + { + Column: tbl.Keys[1], // boolean key + }, + { + Column: tbl.Keys[2], // binary key + }, + }, + })) + snap.WriteString("--- End " + testcase + " ---\n\n") + } + + { + params := migrateParams{ + SourceTable: "some_table", + TmpName: "some_table_tmp", + Columns: []migrateColumn{ + {Identifier: "not_migrated_column"}, + {Identifier: "is_migrated_column", CastSQL: "CAST(is_migrated_column AS VARCHAR(MAX))"}, + {Identifier: "another_not_migrated_column"}, + {Identifier: "migrated_boolean_column", CastSQL: bitToStringCast(sql.ColumnTypeMigration{ + Column: sql.Column{ + Identifier: "migrated_boolean_column", + MappedType: sql.MappedType{NullableDDL: "VARCHAR(MAX)"}, + }, + })}, + {Identifier: "yet_another_not_migrated_column"}, + }, + } + + snap.WriteString("--- Begin createMigrationTable") + require.NoError(t, tplCreateMigrationTable.Execute(snap, params)) + snap.WriteString("--- End createMigrationTable ---\n\n") + } + + cupaloy.SnapshotT(t, snap.String()) +} diff --git a/materialize-azure-fabric-warehouse/staged_file.go b/materialize-azure-fabric-warehouse/staged_file.go new file mode 100644 index 0000000000..21776f9e34 --- /dev/null +++ b/materialize-azure-fabric-warehouse/staged_file.go @@ -0,0 +1,82 @@ +package main + +import ( + "context" + "fmt" + "io" + "path" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" + boilerplate "github.com/estuary/connectors/materialize-boilerplate" + enc "github.com/estuary/connectors/materialize-boilerplate/stream-encode" + "golang.org/x/sync/errgroup" +) + +// Multiple files are loaded faster by COPY INTO than a single large file. +// Splitting files into 250MiB chunks (after compression) seems to work well +// enough for larger transactions. +const fileSizeLimit = 250 * 1024 * 1024 + +type azureBlobObject struct { + directory string + name string +} + +func (o azureBlobObject) blobName() string { + return path.Join(o.directory, o.name) +} + +func newFileClient(azClient *azblob.Client, container string, directory string) *stagedFileClient { + return &stagedFileClient{ + container: container, + directory: directory, + azClient: azClient, + } +} + +type stagedFileClient struct { + container string + directory string + azClient *azblob.Client +} + +func (s *stagedFileClient) NewEncoder(w io.WriteCloser, fields []string) boilerplate.Encoder { + return enc.NewCsvEncoder(w, fields, enc.WithCsvSkipHeaders()) +} + +func (s *stagedFileClient) NewObject(uuid string) azureBlobObject { + return azureBlobObject{directory: s.directory, name: uuid} +} + +func (s *stagedFileClient) URI(o azureBlobObject) string { + return s.azClient.URL() + path.Join(s.container, o.blobName()) +} + +func (s *stagedFileClient) UploadStream(ctx context.Context, o azureBlobObject, r io.Reader) error { + if _, err := s.azClient.UploadStream(ctx, s.container, o.blobName(), r, nil); err != nil { + return err + } + + return nil +} + +func (s *stagedFileClient) Delete(ctx context.Context, uris []string) error { + group, groupCtx := errgroup.WithContext(ctx) + group.SetLimit(5) + + for _, uri := range uris { + parts := strings.TrimPrefix(uri, s.azClient.URL()) + firstSlash := strings.Index(parts, "/") + container := parts[:firstSlash] + blobName := parts[firstSlash+1:] + group.Go(func() error { + if _, err := s.azClient.DeleteBlob(groupCtx, container, blobName, nil); err != nil { + return fmt.Errorf("deleting blob %q: %w", blobName, err) + } + return nil + }) + } + + return group.Wait() +} diff --git a/materialize-azure-fabric-warehouse/transactor.go b/materialize-azure-fabric-warehouse/transactor.go new file mode 100644 index 0000000000..8948a76e7d --- /dev/null +++ b/materialize-azure-fabric-warehouse/transactor.go @@ -0,0 +1,351 @@ +package main + +import ( + "context" + "fmt" + "strings" + + m "github.com/estuary/connectors/go/protocols/materialize" + boilerplate "github.com/estuary/connectors/materialize-boilerplate" + sql "github.com/estuary/connectors/materialize-sql" + pf "github.com/estuary/flow/go/protocols/flow" + pm "github.com/estuary/flow/go/protocols/materialize" + "github.com/segmentio/encoding/json" + log "github.com/sirupsen/logrus" + "go.gazette.dev/core/consumer/protocol" +) + +type binding struct { + target sql.Table + hasBinaryColumns bool + + load struct { + mergeBounds *sql.MergeBoundsBuilder + } + + store struct { + mustMerge bool + mergeBounds *sql.MergeBoundsBuilder + } +} + +type transactor struct { + cfg *config + + fence sql.Fence + + storeFiles *boilerplate.StagedFiles[azureBlobObject] + loadFiles *boilerplate.StagedFiles[azureBlobObject] + bindings []*binding + be *boilerplate.BindingEvents +} + +func newTransactor( + ctx context.Context, + ep *sql.Endpoint, + fence sql.Fence, + bindings []sql.Table, + open pm.Request_Open, + is *boilerplate.InfoSchema, + be *boilerplate.BindingEvents, +) (m.Transactor, *boilerplate.MaterializeOptions, error) { + cfg := ep.Config.(*config) + + azClient, err := cfg.storageClient() + if err != nil { + return nil, nil, fmt.Errorf("creating storage client: %w", err) + } + + storageClient := newFileClient(azClient, cfg.ContainerName, cfg.Directory) + + t := &transactor{ + cfg: cfg, + fence: fence, + be: be, + loadFiles: boilerplate.NewStagedFiles(storageClient, fileSizeLimit, false), + storeFiles: boilerplate.NewStagedFiles(storageClient, fileSizeLimit, true), + } + + for idx, target := range bindings { + t.loadFiles.AddBinding(idx, target.KeyNames()) + t.storeFiles.AddBinding(idx, target.ColumnNames()) + + hasBinaryColumns := false + for _, col := range target.Columns() { + if col.DDL == "VARBINARY(MAX)" { + hasBinaryColumns = true + } + } + + b := &binding{ + target: target, + hasBinaryColumns: hasBinaryColumns, + } + b.load.mergeBounds = sql.NewMergeBoundsBuilder(target.Keys, ep.Dialect.Literal) + b.store.mergeBounds = sql.NewMergeBoundsBuilder(target.Keys, ep.Dialect.Literal) + t.bindings = append(t.bindings, b) + } + + opts := &boilerplate.MaterializeOptions{ + ExtendedLogging: true, + AckSchedule: &boilerplate.AckScheduleOption{ + Config: cfg.Schedule, + Jitter: []byte(cfg.ConnectionString), + }, + DBTJobTrigger: &cfg.DBTJobTrigger, + } + + return t, opts, nil +} + +func (t *transactor) UnmarshalState(state json.RawMessage) error { return nil } +func (t *transactor) Acknowledge(ctx context.Context) (*pf.ConnectorState, error) { return nil, nil } + +func (t *transactor) Load(it *m.LoadIterator, loaded func(int, json.RawMessage) error) error { + var ctx = it.Context() + + hadLoads := false + for it.Next() { + hadLoads = true + b := t.bindings[it.Binding] + + if converted, err := b.target.ConvertKey(it.Key); err != nil { + return fmt.Errorf("converting Load key: %w", err) + } else if err = t.loadFiles.EncodeRow(ctx, it.Binding, converted); err != nil { + return fmt.Errorf("encoding Load key: %w", err) + } else { + b.load.mergeBounds.NextKey(converted) + } + } + if it.Err() != nil { + return it.Err() + } + + if !hadLoads { + return nil + } + + defer t.loadFiles.CleanupCurrentTransaction(ctx) + + t.be.StartedEvaluatingLoads() + db, err := t.cfg.db() + if err != nil { + return fmt.Errorf("creating db: %w", err) + } + defer db.Close() + + txn, err := db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("load BeginTx: %w", err) + } + defer txn.Rollback() + + var unionQueries []string + var dropQueries []string + for idx, b := range t.bindings { + if !t.loadFiles.Started(idx) { + continue + } + + uris, err := t.loadFiles.Flush(idx) + if err != nil { + return fmt.Errorf("flushing store file: %w", err) + } + + params := &queryParams{ + Table: b.target, + URIs: uris, + StorageAccountKey: t.cfg.StorageAccountKey, + Bounds: b.load.mergeBounds.Build(), + } + + var createQuery strings.Builder + if err := tplCreateLoadTable.Execute(&createQuery, params); err != nil { + return fmt.Errorf("rendering create load table: %w", err) + } else if _, err := txn.ExecContext(ctx, createQuery.String()); err != nil { + log.WithField( + "query", redactedQuery(createQuery, t.cfg.StorageAccountKey), + ).Error("create load table query failed") + return fmt.Errorf("creating load table: %w", err) + } + + var loadQuery strings.Builder + if err := tplLoadQuery.Execute(&loadQuery, params); err != nil { + return fmt.Errorf("rendering load query: %w", err) + } + unionQueries = append(unionQueries, loadQuery.String()) + + dropQuery, err := sql.RenderTableTemplate(b.target, tplDropLoadTable) + if err != nil { + return fmt.Errorf("rendering drop load table: %w", err) + } + dropQueries = append(dropQueries, dropQuery) + } + + q := strings.Join(unionQueries, "\nUNION ALL\n") + rows, err := txn.QueryContext(ctx, q) + if err != nil { + log.WithField("query", q).Error("load query failed") + return fmt.Errorf("querying load documents: %w", err) + } + defer rows.Close() + t.be.FinishedEvaluatingLoads() + + for rows.Next() { + var binding int + var document string + + if err = rows.Scan(&binding, &document); err != nil { + return fmt.Errorf("scanning load document: %w", err) + } else if err = loaded(binding, json.RawMessage(document)); err != nil { + return err + } + } + if err = rows.Err(); err != nil { + return fmt.Errorf("querying Loads: %w", err) + } + + for _, q := range dropQueries { + if _, err := txn.ExecContext(ctx, q); err != nil { + log.WithField("query", q).Error("drop load table query failed") + return fmt.Errorf("dropping load table: %w", err) + } + } + + if err := txn.Commit(); err != nil { + return fmt.Errorf("closing connection: %w", err) + } else if err := db.Close(); err != nil { + return fmt.Errorf("closing db: %w", err) + } else if err := t.loadFiles.CleanupCurrentTransaction(ctx); err != nil { + return fmt.Errorf("cleaning up temporary object files: %w", err) + } + + return nil +} + +func (t *transactor) Store(it *m.StoreIterator) (m.StartCommitFunc, error) { + ctx := it.Context() + + for it.Next() { + if t.cfg.HardDelete && it.Delete && !it.Exists { + continue + } + + b := t.bindings[it.Binding] + if it.Exists { + b.store.mustMerge = true + } + + flowDocument := it.RawJSON + if t.cfg.HardDelete && it.Delete { + flowDocument = json.RawMessage(`"delete"`) + } + + if converted, err := b.target.ConvertAll(it.Key, it.Values, flowDocument); err != nil { + return nil, fmt.Errorf("converting store parameters: %w", err) + } else if err := t.storeFiles.EncodeRow(ctx, it.Binding, converted); err != nil { + return nil, fmt.Errorf("encoding row for store: %w", err) + } else { + b.store.mergeBounds.NextKey(converted[:len(b.target.Keys)]) + } + } + if it.Err() != nil { + return nil, it.Err() + } + + return func(ctx context.Context, runtimeCheckpoint *protocol.Checkpoint) (*pf.ConnectorState, m.OpFuture) { + var err error + if t.fence.Checkpoint, err = runtimeCheckpoint.Marshal(); err != nil { + return nil, m.FinishedOperation(fmt.Errorf("marshalling checkpoint: %w", err)) + } + + var fenceUpdate strings.Builder + if err := tplUpdateFence.Execute(&fenceUpdate, t.fence); err != nil { + return nil, m.FinishedOperation(fmt.Errorf("evaluating fence template: %w", err)) + } + + return nil, m.RunAsyncOperation(func() error { + defer t.storeFiles.CleanupCurrentTransaction(ctx) + + db, err := t.cfg.db() + if err != nil { + return fmt.Errorf("creating db: %w", err) + } + defer db.Close() + + txn, err := db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("store BeginTx: %w", err) + } + defer txn.Rollback() + + for idx, b := range t.bindings { + if !t.storeFiles.Started(idx) { + continue + } + + uris, err := t.storeFiles.Flush(idx) + if err != nil { + return fmt.Errorf("flushing store file for binding[%d]: %w", idx, err) + } + + params := &queryParams{ + Table: b.target, + URIs: uris, + StorageAccountKey: t.cfg.StorageAccountKey, + Bounds: b.store.mergeBounds.Build(), + } + + t.be.StartedResourceCommit(b.target.Path) + if b.store.mustMerge { + var mergeQuery strings.Builder + if err := tplStoreMergeQuery.Execute(&mergeQuery, params); err != nil { + return err + } else if _, err := txn.ExecContext(ctx, mergeQuery.String()); err != nil { + log.WithField( + "query", redactedQuery(mergeQuery, t.cfg.StorageAccountKey), + ).Error("merge query failed") + return fmt.Errorf("executing store merge query for binding[%d]: %w", idx, err) + } + } else { + var copyIntoQuery strings.Builder + tpl := tplStoreCopyIntoDirectQuery + if b.hasBinaryColumns { + tpl = tplStoreCopyIntoFromStagedQuery + } + + if err := tpl.Execute(©IntoQuery, params); err != nil { + return err + } else if _, err := txn.ExecContext(ctx, copyIntoQuery.String()); err != nil { + log.WithField( + "query", redactedQuery(copyIntoQuery, t.cfg.StorageAccountKey), + ).Error("copy into query failed") + return fmt.Errorf("executing store copy into query for binding[%d]: %w", idx, err) + } + } + t.be.FinishedResourceCommit(b.target.Path) + b.store.mustMerge = false + } + + if res, err := txn.ExecContext(ctx, fenceUpdate.String()); err != nil { + return fmt.Errorf("updating checkpoints: %w", err) + } else if rows, err := res.RowsAffected(); err != nil { + return fmt.Errorf("getting fence update rows affected: %w", err) + } else if rows != 1 { + return fmt.Errorf("this instance was fenced off by another") + } else if err := txn.Commit(); err != nil { + return fmt.Errorf("committing store transaction: %w", err) + } else if err := t.storeFiles.CleanupCurrentTransaction(ctx); err != nil { + return fmt.Errorf("cleaning up temporary object files: %w", err) + } + + return nil + }) + }, nil +} + +func (t *transactor) Destroy() {} + +func redactedQuery(query strings.Builder, storageAccountKey string) string { + return strings.ReplaceAll(query.String(), storageAccountKey, "REDACTED") +} diff --git a/materialize-bigquery/.snapshots/TestValidateAndApplyMigrations b/materialize-bigquery/.snapshots/TestValidateAndApplyMigrations index 067652572e..d4c9cf353c 100644 --- a/materialize-bigquery/.snapshots/TestValidateAndApplyMigrations +++ b/materialize-bigquery/.snapshots/TestValidateAndApplyMigrations @@ -41,7 +41,7 @@ Migratable Changes Before Apply Schema: Migratable Changes Before Apply Data: key (STRING), _meta_flow_truncated (BOOLEAN), boolWidenedToJson (BOOLEAN), dateValue (DATE), datetimeValue (TIMESTAMP), flow_published_at (TIMESTAMP), int64 (INTEGER), intWidenedToJson (INTEGER), multiple (JSON), nonScalarValue (STRING), numericString (BIGNUMERIC), optional (STRING), requiredNumeric (BIGNUMERIC), scalarValue (STRING), stringWidenedToJson (STRING), timeValue (STRING), flow_document (STRING) -1, false, true, 2024-01-01, 2024-01-01 01:01:01.111111 +0000 UTC, 2024-09-13 01:01:01 +0000 UTC, 1, 999, , , 123/1, , 456/1, test, hello, 01:01:01, {} +1, false, true, 2024-01-01, 2024-01-01 01:01:01.111111 +0000 UTC, 2024-09-13 01:01:01 +0000 UTC, 1, 999, , , 12300000000000000000000000000000000000000/100000000000000000000000000000000000000, , 45600000000000000000000000000000000000000/100000000000000000000000000000000000000, test, hello, 01:01:01, {} Migratable Changes Constraints: {"Field":"_meta/flow_truncated","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} @@ -71,7 +71,7 @@ Migratable Changes Applied Schema: {"Name":"datetimeValue","Nullable":"YES","Type":"STRING"} {"Name":"flow_document","Nullable":"NO","Type":"STRING"} {"Name":"flow_published_at","Nullable":"NO","Type":"TIMESTAMP"} -{"Name":"int64","Nullable":"YES","Type":"INT64"} +{"Name":"int64","Nullable":"YES","Type":"BIGNUMERIC(38)"} {"Name":"intWidenedToJson","Nullable":"YES","Type":"JSON"} {"Name":"key","Nullable":"NO","Type":"STRING"} {"Name":"multiple","Nullable":"YES","Type":"JSON"} @@ -85,6 +85,6 @@ Migratable Changes Applied Schema: Migratable Changes Applied Data: -key (STRING), _meta_flow_truncated (BOOLEAN), flow_published_at (TIMESTAMP), int64 (INTEGER), multiple (JSON), nonScalarValue (STRING), optional (STRING), scalarValue (STRING), timeValue (STRING), flow_document (STRING), boolWidenedToJson (JSON), dateValue (STRING), datetimeValue (STRING), intWidenedToJson (JSON), numericString (STRING), requiredNumeric (STRING), stringWidenedToJson (JSON) -1, false, 2024-09-13 01:01:01 +0000 UTC, 1, , , , test, 01:01:01, {}, true, 2024-01-01, 2024-01-01T01:01:01.111111Z, 999, 123, 456, "hello" +key (STRING), _meta_flow_truncated (BOOLEAN), flow_published_at (TIMESTAMP), multiple (JSON), nonScalarValue (STRING), optional (STRING), scalarValue (STRING), timeValue (STRING), flow_document (STRING), boolWidenedToJson (JSON), dateValue (STRING), datetimeValue (STRING), int64 (BIGNUMERIC), intWidenedToJson (JSON), numericString (STRING), requiredNumeric (STRING), stringWidenedToJson (JSON) +1, false, 2024-09-13 01:01:01 +0000 UTC, , , , test, 01:01:01, {}, true, 2024-01-01, 2024-01-01T01:01:01.111111Z, 100000000000000000000000000000000000000/100000000000000000000000000000000000000, 999, 123, 456, "hello" diff --git a/materialize-bigquery/sqlgen.go b/materialize-bigquery/sqlgen.go index 8e35babb95..44866b7590 100644 --- a/materialize-bigquery/sqlgen.go +++ b/materialize-bigquery/sqlgen.go @@ -99,7 +99,10 @@ var bqDialect = func() sql.Dialect { return sql.Dialect{ MigratableTypes: sql.MigrationSpecs{ - "integer": {sql.NewMigrationSpec([]string{"string"})}, + "integer": { + sql.NewMigrationSpec([]string{"bignumeric(38,0)"}, sql.WithCastSQL(toBigNumericCast)), + sql.NewMigrationSpec([]string{"string"}), + }, "bignumeric": {sql.NewMigrationSpec([]string{"string"})}, "float": {sql.NewMigrationSpec([]string{"string"})}, "date": {sql.NewMigrationSpec([]string{"string"})}, @@ -140,6 +143,10 @@ func toJsonCast(migration sql.ColumnTypeMigration) string { return fmt.Sprintf(`TO_JSON(%s)`, migration.Identifier) } +func toBigNumericCast(m sql.ColumnTypeMigration) string { + return fmt.Sprintf("CAST(%s AS BIGNUMERIC)", m.Identifier) +} + var ( tplAll = sql.MustParseTemplate(bqDialect, "root", ` {{ define "tempTableName" -}} diff --git a/materialize-bigquery/sqlgen_test.go b/materialize-bigquery/sqlgen_test.go index 7996d7b743..3d49144907 100644 --- a/materialize-bigquery/sqlgen_test.go +++ b/materialize-bigquery/sqlgen_test.go @@ -43,17 +43,17 @@ func TestSQLGeneration(t *testing.T) { bounds := []sql.MergeBound{ { - Identifier: tbl.Keys[0].Identifier, + Column: tbl.Keys[0], LiteralLower: bqDialect.Literal(int64(10)), LiteralUpper: bqDialect.Literal(int64(100)), }, { - Identifier: tbl.Keys[1].Identifier, + Column: tbl.Keys[1], // No bounds - as would be the case for a boolean key, which // would be a very weird key, but technically allowed. }, { - Identifier: tbl.Keys[2].Identifier, + Column: tbl.Keys[2], LiteralLower: bqDialect.Literal("aGVsbG8K"), LiteralUpper: bqDialect.Literal("Z29vZGJ5ZQo="), }, diff --git a/materialize-boilerplate/staged_files.go b/materialize-boilerplate/staged_files.go new file mode 100644 index 0000000000..87c38e0094 --- /dev/null +++ b/materialize-boilerplate/staged_files.go @@ -0,0 +1,223 @@ +package boilerplate + +import ( + "context" + "fmt" + "io" + + "github.com/google/uuid" + "golang.org/x/sync/errgroup" +) + +// Encoder is any streaming encoder that can be used to write files. +type Encoder interface { + Encode(row []any) error + Written() int + Close() error +} + +// StagedFileStorageClient is a specific implementation of a client that +// interacts with a staging file system, usually an object store of some kind. +type StagedFileStorageClient[T any] interface { + // NewEncoder creates a new Encoder that writes rows to a writer. + NewEncoder(w io.WriteCloser, fields []string) Encoder + + // NewObject creates a new file object handle from a random UUID. The UUID + // should be part of the file name, and the specifics of the object + // construction are generic to the implementation. + NewObject(uuid string) T + + // URI creates an identifier from T. + URI(T) string + + // UploadStream streams data from a reader to the destination represented by + // the object T. For example, this may read bytes from r and write the bytes + // to a blob at a specific location represented by T. + UploadStream(ctx context.Context, object T, r io.Reader) error + + // Delete deletes the objects per the list of URIs. This is used for + // removing the temporary staged files after transactions are applied. + Delete(ctx context.Context, uris []string) error +} + +type StagedFiles[T any] struct { + client StagedFileStorageClient[T] + fileSizeLimit int + flushOnNextBinding bool + stagedFiles []stagedFile[T] + lastBinding int + didCleanupCurrent bool +} + +// NewStagedFiles creates a StagedFiles instance, which is used for staging data +// on a remote system for later committing to a materialized destination. +// +// fileSizeLimit controls how large the staged files are in bytes. If there is a +// lot of data to stage for a single binding, the data will be split into +// multiple files with each one being approximately fileSizeLimit in size. +// +// flushOnNextBinding can be set to flush the file stream whenever a new binding +// (by index) has a row encoded for it. Flushing the file stream will result in +// the current streaming encoder being closed and flushed, which concludes the +// current file being written. Any further writes to that same binding will +// start a new file, so this should usually only be enabled for encoding rows +// received from Store requests, where the documents are always in monotonic +// order with respect to their binding index. +func NewStagedFiles[T any](client StagedFileStorageClient[T], fileSizeLimit int, flushOnNextBinding bool) *StagedFiles[T] { + return &StagedFiles[T]{ + client: client, + fileSizeLimit: fileSizeLimit, + flushOnNextBinding: flushOnNextBinding, + lastBinding: -1, + } +} + +// AddBinding adds a binding. Bindings must be added in order of their binding +// index, starting from 0. +func (sf *StagedFiles[T]) AddBinding(binding int, fields []string) { + if binding != len(sf.stagedFiles) { + panic("bindings must be added monotonically increasing order starting with binding 0") + } + + sf.stagedFiles = append(sf.stagedFiles, stagedFile[T]{ + client: sf.client, + fileSizeLimit: sf.fileSizeLimit, + fields: fields, + }) +} + +// EncodeRow encodes a row of data for the binding. +func (sf *StagedFiles[T]) EncodeRow(ctx context.Context, binding int, row []any) error { + if sf.flushOnNextBinding && sf.lastBinding != -1 && binding != sf.lastBinding { + if err := sf.stagedFiles[sf.lastBinding].flushFile(); err != nil { + return fmt.Errorf("flushing prior binding [%d]: %w", sf.lastBinding, err) + } + } + sf.lastBinding = binding + sf.didCleanupCurrent = false + + return sf.stagedFiles[binding].encodeRow(ctx, row) +} + +// Flush flushes the current encoder and closes the file. +func (sf *StagedFiles[T]) Flush(binding int) ([]string, error) { + return sf.stagedFiles[binding].flush() +} + +// CleanupCurrentTransaction attempts to delete all of the known staged files +// for the current transaction. It is safe to call multiple times, and if called +// again will not re-attempt to delete the same files. This makes it convenient +// to use both as part of a deferred call, and an inline call to check for +// deletion errors. +func (sf *StagedFiles[T]) CleanupCurrentTransaction(ctx context.Context) error { + if !sf.didCleanupCurrent { + sf.didCleanupCurrent = true + + var uris []string + for _, f := range sf.stagedFiles { + for _, u := range f.uploaded { + uris = append(uris, sf.client.URI(u)) + } + } + + return sf.client.Delete(ctx, uris) + } + + return nil +} + +// CleanupCheckpoint deletes files specified int he list of URIs. This can be +// used to cleanup files that have been staged for a materialization using the +// post-commit apply pattern, where staged files must remain across connector +// restarts. +func (sf *StagedFiles[T]) CleanupCheckpoint(ctx context.Context, uris []string) error { + return sf.client.Delete(ctx, uris) +} + +// Started indicates if any rows were encoded for the binding during this +// transaction. +func (sf *StagedFiles[T]) Started(binding int) bool { + return sf.stagedFiles[binding].started +} + +type stagedFile[T any] struct { + client StagedFileStorageClient[T] + fileSizeLimit int + fields []string + encoder Encoder + group *errgroup.Group + started bool + uploaded []T +} + +func (f *stagedFile[T]) encodeRow(ctx context.Context, row []any) error { + if !f.started { + f.uploaded = nil + f.started = true + } + + if f.encoder == nil { + f.newFile(ctx) + } + + if err := f.encoder.Encode(row); err != nil { + return fmt.Errorf("encoding row: %w", err) + } + + if f.encoder.Written() >= f.fileSizeLimit { + if err := f.flushFile(); err != nil { + return err + } + } + + return nil +} + +func (f *stagedFile[T]) flush() ([]string, error) { + if err := f.flushFile(); err != nil { + return nil, err + } + + var uploaded []string + for _, obj := range f.uploaded { + uploaded = append(uploaded, f.client.URI(obj)) + } + f.started = false + + return uploaded, nil +} + +func (f *stagedFile[T]) newFile(ctx context.Context) { + r, w := io.Pipe() + f.encoder = f.client.NewEncoder(w, f.fields) + + group, groupCtx := errgroup.WithContext(ctx) + f.group = group + obj := f.client.NewObject(uuid.NewString()) + f.uploaded = append(f.uploaded, obj) + + f.group.Go(func() error { + if err := f.client.UploadStream(groupCtx, obj, r); err != nil { + r.CloseWithError(err) + return fmt.Errorf("uploading file: %w", err) + } + + return nil + }) +} + +func (f *stagedFile[T]) flushFile() error { + if f.encoder == nil { + return nil + } + + if err := f.encoder.Close(); err != nil { + return fmt.Errorf("closing encoder: %w", err) + } else if err := f.group.Wait(); err != nil { + return err + } + + f.encoder = nil + + return nil +} diff --git a/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_custom_delimiter b/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_custom_delimiter deleted file mode 100644 index adfe7cf5fd..0000000000 --- a/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_custom_delimiter +++ /dev/null @@ -1,12 +0,0 @@ -intField,numField,boolField,binaryField,stringField,uuidField,jsonField,dateField,timeField,timestampField,intervalField -,0.9090909090909091,false,c3RyXzE=,str_1,38373433-3437-6136-6339-636264386136,"{""first"":10001,""second"":10002,""third"":10003}",2007-02-03,15:04:07.499999999Z,2007-02-03 15:04:07.499999,P1Y1M1W1DT1H1M1S -2,,true,c3RyXzI=,str_2,66643364-3036-3934-6335-303634316664,"{""first"":20001,""second"":20002,""third"":20003}",2008-03-04,15:04:08.999999999Z,2008-03-04 15:04:08.999999,P2Y2M2W2DT2H2M2S -3,2.727272727272727,,c3RyXzM=,str_3,36333263-3361-3830-3762-336566393933,"{""first"":30001,""second"":30002,""third"":30003}",2009-04-05,15:04:10.499999999Z,2009-04-05 15:04:10.499999,P3Y3M3W3DT3H3M3S -4,3.6363636363636362,true,,str_4,61323033-6362-3130-3733-616630363737,"{""first"":40001,""second"":40002,""third"":40003}",2010-05-06,15:04:11.999999999Z,2010-05-06 15:04:11.999999,P4Y4M4DT4H4M4S -5,4.545454545454545,false,c3RyXzU=,,65613135-3538-3261-3738-636663613334,"{""first"":50001,""second"":50002,""third"":50003}",2011-06-07,15:04:13.499999999Z,2011-06-07 15:04:13.499999,P5Y5M1W5DT5H5M5S -6,5.454545454545454,true,c3RyXzY=,str_6,,"{""first"":60001,""second"":60002,""third"":60003}",2012-07-08,15:04:14.999999999Z,2012-07-08 15:04:14.999999,P6Y6M2W6DT6H6M6S -7,6.363636363636363,false,c3RyXzc=,str_7,63313061-3333-3638-6630-636462323038,,2013-08-09,15:04:16.499999999Z,2013-08-09 15:04:16.499999,P7Y7M3W7DT7H7M7S -8,7.2727272727272725,true,c3RyXzg=,str_8,33393265-3036-3339-3633-636331643962,"{""first"":80001,""second"":80002,""third"":80003}",,15:04:17.999999999Z,2014-09-10 15:04:17.999999,P8Y8M8DT8H8M8S -9,8.181818181818182,false,c3RyXzk=,str_9,37396661-6562-6461-6433-323664383133,"{""first"":90001,""second"":90002,""third"":90003}",2015-10-11,,2015-10-11 15:04:19.499999,P9Y9M1W9DT9H9M9S -10,9.09090909090909,true,c3RyXzEw,str_10,33353133-3736-3334-3934-336265336235,"{""first"":100001,""second"":100002,""third"":100003}",2016-11-12,15:04:20.999999999Z,,P10Y10M2W10DT10H10M10S - diff --git a/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_custom_null b/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_custom_null deleted file mode 100644 index a949aca020..0000000000 --- a/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_custom_null +++ /dev/null @@ -1,12 +0,0 @@ -intField,numField,boolField,binaryField,stringField,uuidField,jsonField,dateField,timeField,timestampField,intervalField -MyNullString,0.9090909090909091,false,c3RyXzE=,str_1,38373433-3437-6136-6339-636264386136,"{""first"":10001,""second"":10002,""third"":10003}",2007-02-03,15:04:07.499999999Z,2007-02-03T15:04:07.499999999Z,P1Y1M1W1DT1H1M1S -2,MyNullString,true,c3RyXzI=,str_2,66643364-3036-3934-6335-303634316664,"{""first"":20001,""second"":20002,""third"":20003}",2008-03-04,15:04:08.999999999Z,2008-03-04T15:04:08.999999999Z,P2Y2M2W2DT2H2M2S -3,2.727272727272727,MyNullString,c3RyXzM=,str_3,36333263-3361-3830-3762-336566393933,"{""first"":30001,""second"":30002,""third"":30003}",2009-04-05,15:04:10.499999999Z,2009-04-05T15:04:10.499999999Z,P3Y3M3W3DT3H3M3S -4,3.6363636363636362,true,MyNullString,str_4,61323033-6362-3130-3733-616630363737,"{""first"":40001,""second"":40002,""third"":40003}",2010-05-06,15:04:11.999999999Z,2010-05-06T15:04:11.999999999Z,P4Y4M4DT4H4M4S -5,4.545454545454545,false,c3RyXzU=,MyNullString,65613135-3538-3261-3738-636663613334,"{""first"":50001,""second"":50002,""third"":50003}",2011-06-07,15:04:13.499999999Z,2011-06-07T15:04:13.499999999Z,P5Y5M1W5DT5H5M5S -6,5.454545454545454,true,c3RyXzY=,str_6,MyNullString,"{""first"":60001,""second"":60002,""third"":60003}",2012-07-08,15:04:14.999999999Z,2012-07-08T15:04:14.999999999Z,P6Y6M2W6DT6H6M6S -7,6.363636363636363,false,c3RyXzc=,str_7,63313061-3333-3638-6630-636462323038,MyNullString,2013-08-09,15:04:16.499999999Z,2013-08-09T15:04:16.499999999Z,P7Y7M3W7DT7H7M7S -8,7.2727272727272725,true,c3RyXzg=,str_8,33393265-3036-3339-3633-636331643962,"{""first"":80001,""second"":80002,""third"":80003}",MyNullString,15:04:17.999999999Z,2014-09-10T15:04:17.999999999Z,P8Y8M8DT8H8M8S -9,8.181818181818182,false,c3RyXzk=,str_9,37396661-6562-6461-6433-323664383133,"{""first"":90001,""second"":90002,""third"":90003}",2015-10-11,MyNullString,2015-10-11T15:04:19.499999999Z,P9Y9M1W9DT9H9M9S -10,9.09090909090909,true,c3RyXzEw,str_10,33353133-3736-3334-3934-336265336235,"{""first"":100001,""second"":100002,""third"":100003}",2016-11-12,15:04:20.999999999Z,MyNullString,P10Y10M2W10DT10H10M10S - diff --git a/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_default_null b/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_default_null deleted file mode 100644 index adfe7cf5fd..0000000000 --- a/materialize-boilerplate/stream-encode/.snapshots/TestCsvEncoder-with_default_null +++ /dev/null @@ -1,12 +0,0 @@ -intField,numField,boolField,binaryField,stringField,uuidField,jsonField,dateField,timeField,timestampField,intervalField -,0.9090909090909091,false,c3RyXzE=,str_1,38373433-3437-6136-6339-636264386136,"{""first"":10001,""second"":10002,""third"":10003}",2007-02-03,15:04:07.499999999Z,2007-02-03 15:04:07.499999,P1Y1M1W1DT1H1M1S -2,,true,c3RyXzI=,str_2,66643364-3036-3934-6335-303634316664,"{""first"":20001,""second"":20002,""third"":20003}",2008-03-04,15:04:08.999999999Z,2008-03-04 15:04:08.999999,P2Y2M2W2DT2H2M2S -3,2.727272727272727,,c3RyXzM=,str_3,36333263-3361-3830-3762-336566393933,"{""first"":30001,""second"":30002,""third"":30003}",2009-04-05,15:04:10.499999999Z,2009-04-05 15:04:10.499999,P3Y3M3W3DT3H3M3S -4,3.6363636363636362,true,,str_4,61323033-6362-3130-3733-616630363737,"{""first"":40001,""second"":40002,""third"":40003}",2010-05-06,15:04:11.999999999Z,2010-05-06 15:04:11.999999,P4Y4M4DT4H4M4S -5,4.545454545454545,false,c3RyXzU=,,65613135-3538-3261-3738-636663613334,"{""first"":50001,""second"":50002,""third"":50003}",2011-06-07,15:04:13.499999999Z,2011-06-07 15:04:13.499999,P5Y5M1W5DT5H5M5S -6,5.454545454545454,true,c3RyXzY=,str_6,,"{""first"":60001,""second"":60002,""third"":60003}",2012-07-08,15:04:14.999999999Z,2012-07-08 15:04:14.999999,P6Y6M2W6DT6H6M6S -7,6.363636363636363,false,c3RyXzc=,str_7,63313061-3333-3638-6630-636462323038,,2013-08-09,15:04:16.499999999Z,2013-08-09 15:04:16.499999,P7Y7M3W7DT7H7M7S -8,7.2727272727272725,true,c3RyXzg=,str_8,33393265-3036-3339-3633-636331643962,"{""first"":80001,""second"":80002,""third"":80003}",,15:04:17.999999999Z,2014-09-10 15:04:17.999999,P8Y8M8DT8H8M8S -9,8.181818181818182,false,c3RyXzk=,str_9,37396661-6562-6461-6433-323664383133,"{""first"":90001,""second"":90002,""third"":90003}",2015-10-11,,2015-10-11 15:04:19.499999,P9Y9M1W9DT9H9M9S -10,9.09090909090909,true,c3RyXzEw,str_10,33353133-3736-3334-3934-336265336235,"{""first"":100001,""second"":100002,""third"":100003}",2016-11-12,15:04:20.999999999Z,,P10Y10M2W10DT10H10M10S - diff --git a/materialize-boilerplate/stream-encode/csv.go b/materialize-boilerplate/stream-encode/csv.go index 6fec92c982..fed2ae391d 100644 --- a/materialize-boilerplate/stream-encode/csv.go +++ b/materialize-boilerplate/stream-encode/csv.go @@ -2,11 +2,13 @@ package stream_encode import ( "compress/flate" - "encoding/csv" "encoding/json" "fmt" "io" "strconv" + "strings" + "unicode" + "unicode/utf8" "github.com/klauspost/compress/gzip" ) @@ -15,14 +17,12 @@ const csvCompressionlevel = flate.BestSpeed type csvConfig struct { skipHeaders bool - nullStr string - delimiter rune } type CsvEncoder struct { cfg csvConfig fields []string - csv *csv.Writer + csv *csvWriter cwc *countingWriteCloser gz *gzip.Writer } @@ -35,18 +35,6 @@ func WithCsvSkipHeaders() CsvOption { } } -func WithCsvNullString(str string) CsvOption { - return func(cfg *csvConfig) { - cfg.nullStr = str - } -} - -func WithCsvDelimiter(r rune) CsvOption { - return func(cfg *csvConfig) { - cfg.delimiter = r - } -} - func NewCsvEncoder(w io.WriteCloser, fields []string, opts ...CsvOption) *CsvEncoder { var cfg csvConfig for _, o := range opts { @@ -60,14 +48,9 @@ func NewCsvEncoder(w io.WriteCloser, fields []string, opts ...CsvOption) *CsvEnc panic("invalid compression level for gzip.NewWriterLevel") } - csvw := csv.NewWriter(gz) - if cfg.delimiter != 0 { - csvw.Comma = cfg.delimiter - } - return &CsvEncoder{ cfg: cfg, - csv: csvw, + csv: newCsvWriter(gz), cwc: cwc, gz: gz, fields: fields, @@ -76,56 +59,134 @@ func NewCsvEncoder(w io.WriteCloser, fields []string, opts ...CsvOption) *CsvEnc func (e *CsvEncoder) Encode(row []any) error { if !e.cfg.skipHeaders { - if err := e.csv.Write(e.fields); err != nil { + headerRow := make([]any, len(e.fields)) + for i, f := range e.fields { + headerRow[i] = f + } + if err := e.csv.writeRow(headerRow); err != nil { return fmt.Errorf("writing header: %w", err) } e.cfg.skipHeaders = true } - record := make([]string, 0, len(row)) + return e.csv.writeRow(row) +} + +func (e *CsvEncoder) Written() int { + return e.cwc.written +} + +func (e *CsvEncoder) Close() error { + if err := e.gz.Close(); err != nil { + return fmt.Errorf("closing gzip writer: %w", err) + } else if err := e.cwc.Close(); err != nil { + return fmt.Errorf("closing counting writer: %w", err) + } + + return nil +} + +type csvWriter struct { + w io.Writer +} + +func newCsvWriter(w io.Writer) *csvWriter { + return &csvWriter{w: w} +} - for _, v := range row { +func (w *csvWriter) writeRow(row []any) error { + for n, v := range row { + if n > 0 { + if _, err := w.w.Write([]byte(",")); err != nil { + return err + } + } + + var field string switch value := v.(type) { case json.RawMessage: - record = append(record, string(value)) + field = string(value) case []byte: - record = append(record, string(value)) + field = string(value) case string: - record = append(record, value) + field = value case bool: - record = append(record, strconv.FormatBool(value)) + field = strconv.FormatBool(value) case int64: - record = append(record, strconv.Itoa(int(value))) + field = strconv.Itoa(int(value)) case int: - record = append(record, strconv.Itoa(value)) + field = strconv.Itoa(value) case float64: - record = append(record, strconv.FormatFloat(value, 'f', -1, 64)) + field = strconv.FormatFloat(value, 'f', -1, 64) case float32: - record = append(record, strconv.FormatFloat(float64(value), 'f', -1, 64)) + field = strconv.FormatFloat(float64(value), 'f', -1, 64) case nil: - record = append(record, e.cfg.nullStr) + continue default: - record = append(record, fmt.Sprintf("%v", value)) + field = fmt.Sprintf("%v", value) + } + + if err := w.writeField(field); err != nil { + return err } } - return e.csv.Write(record) + if _, err := w.w.Write([]byte("\n")); err != nil { + return err + } + + return nil } -func (e *CsvEncoder) Written() int { - return e.cwc.written +func (w *csvWriter) writeField(field string) error { + if !w.fieldNeedsQuotes(field) { + if _, err := w.w.Write([]byte(field)); err != nil { + return err + } + } else { + if _, err := w.w.Write([]byte(`"`)); err != nil { + return err + } + for len(field) > 0 { + // Escape quote characters present in the string by replacing them + // with double quotes. + i := strings.Index(field, `"`) + if i < 0 { + i = len(field) + } + + if _, err := w.w.Write([]byte(field[:i])); err != nil { + return err + } + + field = field[i:] + if len(field) > 0 { + if _, err := w.w.Write([]byte(`""`)); err != nil { + return err + } + field = field[1:] + } + } + if _, err := w.w.Write([]byte(`"`)); err != nil { + return err + } + } + + return nil } -func (e *CsvEncoder) Close() error { - e.csv.Flush() +func (w *csvWriter) fieldNeedsQuotes(field string) bool { + if field == "" { + return true + } - if err := e.csv.Error(); err != nil { - return fmt.Errorf("flushing csv writer: %w", err) - } else if err := e.gz.Close(); err != nil { - return fmt.Errorf("closing gzip writer: %w", err) - } else if err := e.cwc.Close(); err != nil { - return fmt.Errorf("closing counting writer: %w", err) + for i := 0; i < len(field); i++ { + c := field[i] + if c == '\n' || c == '\r' || c == '"' || c == ',' { + return true + } } - return nil + r1, _ := utf8.DecodeRuneInString(field) + return unicode.IsSpace(r1) } diff --git a/materialize-boilerplate/stream-encode/csv_test.go b/materialize-boilerplate/stream-encode/csv_test.go index 855c57e561..90586a0f5a 100644 --- a/materialize-boilerplate/stream-encode/csv_test.go +++ b/materialize-boilerplate/stream-encode/csv_test.go @@ -1,6 +1,7 @@ package stream_encode import ( + "bytes" "os" "testing" @@ -24,21 +25,6 @@ func TestCsvEncoder(t *testing.T) { nulls: false, opts: []CsvOption{WithCsvSkipHeaders()}, }, - { - name: "with default null", - nulls: true, - opts: nil, - }, - { - name: "with custom null", - nulls: true, - opts: []CsvOption{WithCsvNullString("MyNullString")}, - }, - { - name: "with custom delimiter", - nulls: true, - opts: []CsvOption{WithCsvDelimiter([]rune("|")[0])}, - }, } for _, tt := range tests { @@ -61,3 +47,39 @@ func TestCsvEncoder(t *testing.T) { }) } } + +func TestCsvWriter(t *testing.T) { + for _, tt := range []struct { + name string + row []any + want string + }{ + { + name: "empty", + row: nil, + want: "\n", + }, + { + name: "basic", + row: []any{"first", "second", "third"}, + want: "first,second,third\n", + }, + { + name: "empty string and null", + row: []any{"first", "", nil}, + want: "first,\"\",\n", + }, + { + name: "special characters", + row: []any{"has\nnewline", " startsWithSpace", "\tstartsWithTab", "has\"quote", "has,comma", "has\rreturn"}, + want: "\"has\nnewline\",\" startsWithSpace\",\"\tstartsWithTab\",\"has\"\"quote\",\"has,comma\",\"has\rreturn\"\n", + }, + } { + t.Run(tt.name, func(t *testing.T) { + var buf bytes.Buffer + csvw := newCsvWriter(&buf) + require.NoError(t, csvw.writeRow(tt.row)) + require.Equal(t, tt.want, buf.String()) + }) + } +} diff --git a/materialize-databricks/.snapshots/TestValidateAndApplyMigrations b/materialize-databricks/.snapshots/TestValidateAndApplyMigrations index 27405c2311..8bc5eef39c 100644 --- a/materialize-databricks/.snapshots/TestValidateAndApplyMigrations +++ b/materialize-databricks/.snapshots/TestValidateAndApplyMigrations @@ -71,7 +71,7 @@ Migratable Changes Applied Schema: {"Name":"datetimeValue","Nullable":"YES","Type":"STRING"} {"Name":"flow_document","Nullable":"NO","Type":"STRING"} {"Name":"flow_published_at","Nullable":"NO","Type":"TIMESTAMP"} -{"Name":"int64","Nullable":"YES","Type":"LONG"} +{"Name":"int64","Nullable":"YES","Type":"DECIMAL"} {"Name":"intWidenedToJson","Nullable":"YES","Type":"STRING"} {"Name":"key","Nullable":"NO","Type":"STRING"} {"Name":"multiple","Nullable":"YES","Type":"STRING"} @@ -85,6 +85,6 @@ Migratable Changes Applied Schema: Migratable Changes Applied Data: -key (STRING), _meta/flow_truncated (BOOLEAN), boolWidenedToJson (BOOLEAN), flow_published_at (TIMESTAMP), int64 (BIGINT), multiple (STRING), nonScalarValue (STRING), optional (STRING), scalarValue (STRING), stringWidenedToJson (STRING), timeValue (STRING), flow_document (STRING), dateValue (STRING), datetimeValue (STRING), intWidenedToJson (STRING), numericString (STRING), requiredNumeric (STRING) -1, false, true, 2024-09-13T01:01:01Z, 1, , , , test, hello, 01:01:01, {}, 2024-01-01, 2024-01-01T01:01:01.111111000Z, 999, 123, 456 +key (STRING), _meta/flow_truncated (BOOLEAN), boolWidenedToJson (BOOLEAN), flow_published_at (TIMESTAMP), multiple (STRING), nonScalarValue (STRING), optional (STRING), scalarValue (STRING), stringWidenedToJson (STRING), timeValue (STRING), flow_document (STRING), dateValue (STRING), datetimeValue (STRING), int64 (DECIMAL), intWidenedToJson (STRING), numericString (STRING), requiredNumeric (STRING) +1, false, true, 2024-09-13T01:01:01Z, , , , test, hello, 01:01:01, {}, 2024-01-01, 2024-01-01T01:01:01.111111000Z, 1, 999, 123, 456 diff --git a/materialize-databricks/sqlgen.go b/materialize-databricks/sqlgen.go index 783507d8da..330ebe7d6e 100644 --- a/materialize-databricks/sqlgen.go +++ b/materialize-databricks/sqlgen.go @@ -62,7 +62,7 @@ var databricksDialect = func() sql.Dialect { return sql.Dialect{ MigratableTypes: sql.MigrationSpecs{ "decimal": {sql.NewMigrationSpec([]string{"string"})}, - "long": {sql.NewMigrationSpec([]string{"string"})}, + "long": {sql.NewMigrationSpec([]string{"numeric(38,0)", "string"})}, "double": {sql.NewMigrationSpec([]string{"string"})}, "timestamp": {sql.NewMigrationSpec([]string{"string"}, sql.WithCastSQL(datetimeToStringCast))}, "date": {sql.NewMigrationSpec([]string{"string"})}, diff --git a/materialize-gcs-csv/.snapshots/TestSpec b/materialize-gcs-csv/.snapshots/TestSpec index 8db617ee94..29c847b193 100644 --- a/materialize-gcs-csv/.snapshots/TestSpec +++ b/materialize-gcs-csv/.snapshots/TestSpec @@ -44,18 +44,6 @@ }, "csvConfig": { "properties": { - "delimiter": { - "type": "string", - "title": "Delimiter", - "description": "Character to separate columns within a row. Defaults to a comma if blank. Must be a single character with a byte length of 1.", - "order": 0 - }, - "nullString": { - "type": "string", - "title": "Null String", - "description": "String to use to represent NULL values. Defaults to an empty string if blank.", - "order": 1 - }, "skipHeaders": { "type": "boolean", "title": "Skip Headers", diff --git a/materialize-motherduck/.snapshots/TestValidateAndApplyMigrations b/materialize-motherduck/.snapshots/TestValidateAndApplyMigrations index db79a0bbba..7a18d47479 100644 --- a/materialize-motherduck/.snapshots/TestValidateAndApplyMigrations +++ b/materialize-motherduck/.snapshots/TestValidateAndApplyMigrations @@ -41,8 +41,8 @@ Migratable Changes Before Apply Schema: Migratable Changes Before Apply Data: -key (VARCHAR), _meta/flow_truncated (BOOLEAN), boolWidenedToJson (BOOLEAN), dateValue (DATE), datetimeValue (TIMESTAMPTZ), flow_document (VARCHAR), flow_published_at (TIMESTAMPTZ), int64 (BIGINT), intWidenedToJson (BIGINT), multiple (VARCHAR), nonScalarValue (VARCHAR), numericString (HUGEINT), optional (VARCHAR), requiredNumeric (HUGEINT), scalarValue (VARCHAR), stringWidenedToJson (VARCHAR), timeValue (TIME), second_root (VARCHAR) -1, false, true, 2024-01-01T00:00:00Z, 2024-01-01T01:01:01.111111Z, {}, 2024-09-13T01:01:01Z, 1, 999, , , 123, , 456, test, hello, 1970-01-01T01:01:01Z, {} +key (VARCHAR), _meta/flow_truncated (BOOLEAN), boolWidenedToJson (BOOLEAN), dateValue (DATE), datetimeValue (TIMESTAMPTZ), flow_published_at (TIMESTAMPTZ), int64 (BIGINT), intWidenedToJson (BIGINT), multiple (VARCHAR), nonScalarValue (VARCHAR), numericString (HUGEINT), optional (VARCHAR), requiredNumeric (HUGEINT), scalarValue (VARCHAR), second_root (VARCHAR), stringWidenedToJson (VARCHAR), timeValue (TIME), flow_document (VARCHAR) +1, false, true, 2024-01-01T00:00:00Z, 2024-01-01T01:01:01.111111Z, 2024-09-13T01:01:01Z, 1, 999, , , 123, , 456, test, {}, hello, 1970-01-01T01:01:01Z, {} Migratable Changes Constraints: {"Field":"_meta/flow_truncated","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} @@ -72,7 +72,7 @@ Migratable Changes Applied Schema: {"Name":"datetimeValue","Nullable":"YES","Type":"TIMESTAMP WITH TIME ZONE"} {"Name":"flow_document","Nullable":"NO","Type":"JSON"} {"Name":"flow_published_at","Nullable":"NO","Type":"TIMESTAMP WITH TIME ZONE"} -{"Name":"int64","Nullable":"YES","Type":"BIGINT"} +{"Name":"int64","Nullable":"YES","Type":"HUGEINT"} {"Name":"intWidenedToJson","Nullable":"YES","Type":"JSON"} {"Name":"key","Nullable":"NO","Type":"VARCHAR"} {"Name":"multiple","Nullable":"YES","Type":"JSON"} @@ -87,6 +87,6 @@ Migratable Changes Applied Schema: Migratable Changes Applied Data: -key (VARCHAR), _meta/flow_truncated (BOOLEAN), datetimeValue (TIMESTAMPTZ), flow_document (VARCHAR), flow_published_at (TIMESTAMPTZ), int64 (BIGINT), multiple (VARCHAR), nonScalarValue (VARCHAR), optional (VARCHAR), scalarValue (VARCHAR), second_root (VARCHAR), boolWidenedToJson (VARCHAR), dateValue (VARCHAR), intWidenedToJson (VARCHAR), numericString (VARCHAR), requiredNumeric (VARCHAR), stringWidenedToJson (VARCHAR), timeValue (VARCHAR) -1, false, 2024-01-01T01:01:01.111111Z, {}, 2024-09-13T01:01:01Z, 1, , , , test, {}, true, 2024-01-01, 999, 123, 456, "hello", 01:01:01 +key (VARCHAR), _meta/flow_truncated (BOOLEAN), datetimeValue (TIMESTAMPTZ), flow_published_at (TIMESTAMPTZ), multiple (VARCHAR), nonScalarValue (VARCHAR), optional (VARCHAR), scalarValue (VARCHAR), second_root (VARCHAR), flow_document (VARCHAR), boolWidenedToJson (VARCHAR), dateValue (VARCHAR), int64 (HUGEINT), intWidenedToJson (VARCHAR), numericString (VARCHAR), requiredNumeric (VARCHAR), stringWidenedToJson (VARCHAR), timeValue (VARCHAR) +1, false, 2024-01-01T01:01:01.111111Z, 2024-09-13T01:01:01Z, , , , test, {}, {}, true, 2024-01-01, 1, 999, 123, 456, "hello", 01:01:01 diff --git a/materialize-motherduck/sqlgen.go b/materialize-motherduck/sqlgen.go index f6d68df7a0..8b597fbedd 100644 --- a/materialize-motherduck/sqlgen.go +++ b/materialize-motherduck/sqlgen.go @@ -47,7 +47,7 @@ var duckDialect = func() sql.Dialect { return sql.Dialect{ MigratableTypes: sql.MigrationSpecs{ "double": {sql.NewMigrationSpec([]string{"varchar"})}, - "bigint": {sql.NewMigrationSpec([]string{"varchar"})}, + "bigint": {sql.NewMigrationSpec([]string{"hugeint", "varchar"})}, "hugeint": {sql.NewMigrationSpec([]string{"varchar"})}, "date": {sql.NewMigrationSpec([]string{"varchar"})}, "time": {sql.NewMigrationSpec([]string{"varchar"})}, diff --git a/materialize-mysql/.snapshots/TestValidateAndApplyMigrations b/materialize-mysql/.snapshots/TestValidateAndApplyMigrations index 4e23f60672..927fa1cf19 100644 --- a/materialize-mysql/.snapshots/TestValidateAndApplyMigrations +++ b/materialize-mysql/.snapshots/TestValidateAndApplyMigrations @@ -71,7 +71,7 @@ Migratable Changes Applied Schema: {"Name":"datetimeValue","Nullable":"YES","Type":"longtext"} {"Name":"flow_document","Nullable":"NO","Type":"json"} {"Name":"flow_published_at","Nullable":"NO","Type":"datetime"} -{"Name":"int64","Nullable":"YES","Type":"bigint"} +{"Name":"int64","Nullable":"YES","Type":"decimal"} {"Name":"intWidenedToJson","Nullable":"YES","Type":"json"} {"Name":"key","Nullable":"NO","Type":"varchar"} {"Name":"multiple","Nullable":"YES","Type":"json"} @@ -85,6 +85,6 @@ Migratable Changes Applied Schema: Migratable Changes Applied Data: -key (VARCHAR), _meta/flow_truncated (TINYINT), flow_published_at (DATETIME), int64 (BIGINT), multiple (JSON), nonScalarValue (JSON), optional (JSON), scalarValue (TEXT), flow_document (JSON), boolWidenedToJson (JSON), dateValue (TEXT), datetimeValue (TEXT), intWidenedToJson (JSON), numericString (TEXT), requiredNumeric (TEXT), stringWidenedToJson (JSON), timeValue (TEXT) -1, 0, 2024-09-13 01:01:01.000000, 1, , , , test, {}, 1, 2024-01-01, 2024-01-01T01:01:01.111111Z, 999, 123, 456, "hello", 01:01:01.000000 +key (VARCHAR), _meta/flow_truncated (TINYINT), flow_published_at (DATETIME), multiple (JSON), nonScalarValue (JSON), optional (JSON), scalarValue (TEXT), flow_document (JSON), boolWidenedToJson (JSON), dateValue (TEXT), datetimeValue (TEXT), int64 (DECIMAL), intWidenedToJson (JSON), numericString (TEXT), requiredNumeric (TEXT), stringWidenedToJson (JSON), timeValue (TEXT) +1, 0, 2024-09-13 01:01:01.000000, , , , test, {}, 1, 2024-01-01, 2024-01-01T01:01:01.111111Z, 1, 999, 123, 456, "hello", 01:01:01.000000 diff --git a/materialize-mysql/sqlgen.go b/materialize-mysql/sqlgen.go index 82979a5eaa..316338ef09 100644 --- a/materialize-mysql/sqlgen.go +++ b/materialize-mysql/sqlgen.go @@ -95,7 +95,7 @@ var mysqlDialect = func(tzLocation *time.Location, database string, product stri var migrationSpecs = sql.MigrationSpecs{ "decimal": {sql.NewMigrationSpec([]string{"varchar", "longtext"}, nocast)}, - "bigint": {sql.NewMigrationSpec([]string{"varchar", "longtext"}, nocast)}, + "bigint": {sql.NewMigrationSpec([]string{"numeric(65,0)", "varchar", "longtext"}, nocast)}, "double": {sql.NewMigrationSpec([]string{"varchar", "longtext"}, nocast)}, "date": {sql.NewMigrationSpec([]string{"varchar", "longtext"}, nocast)}, "time": {sql.NewMigrationSpec([]string{"varchar", "longtext"}, nocast)}, diff --git a/materialize-postgres/.snapshots/TestValidateAndApply b/materialize-postgres/.snapshots/TestValidateAndApply index c75116bd26..e6faa56465 100644 --- a/materialize-postgres/.snapshots/TestValidateAndApply +++ b/materialize-postgres/.snapshots/TestValidateAndApply @@ -82,7 +82,7 @@ Big Schema Changed Types Constraints: {"Field":"boolField","Type":6,"TypeString":"UNSATISFIABLE","Reason":"Field 'boolField' is already being materialized as endpoint type 'BOOLEAN' but endpoint type 'BIGINT' is required by its schema '{ type: [integer] }'"} {"Field":"flow_document","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is the document in the current materialization"} {"Field":"flow_published_at","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} -{"Field":"intField","Type":6,"TypeString":"UNSATISFIABLE","Reason":"Field 'intField' is already being materialized as endpoint type 'BIGINT' but endpoint type 'TEXT' is required by its schema '{ type: [string] }'"} +{"Field":"intField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} {"Field":"key","Type":1,"TypeString":"FIELD_REQUIRED","Reason":"This field is a key in the current materialization"} {"Field":"multipleField","Type":3,"TypeString":"LOCATION_RECOMMENDED","Reason":"This location is part of the current materialization"} {"Field":"nullField","Type":4,"TypeString":"FIELD_OPTIONAL","Reason":"Object fields may be materialized"} diff --git a/materialize-postgres/.snapshots/TestValidateAndApplyMigrations b/materialize-postgres/.snapshots/TestValidateAndApplyMigrations index d3f68e6be5..f0ba3ca581 100644 --- a/materialize-postgres/.snapshots/TestValidateAndApplyMigrations +++ b/materialize-postgres/.snapshots/TestValidateAndApplyMigrations @@ -71,7 +71,7 @@ Migratable Changes Applied Schema: {"Name":"datetimeValue","Nullable":"YES","Type":"text"} {"Name":"flow_document","Nullable":"NO","Type":"json"} {"Name":"flow_published_at","Nullable":"NO","Type":"timestamp with time zone"} -{"Name":"int64","Nullable":"YES","Type":"bigint"} +{"Name":"int64","Nullable":"YES","Type":"numeric"} {"Name":"intWidenedToJson","Nullable":"YES","Type":"json"} {"Name":"key","Nullable":"NO","Type":"text"} {"Name":"multiple","Nullable":"YES","Type":"json"} @@ -85,6 +85,6 @@ Migratable Changes Applied Schema: Migratable Changes Applied Data: -key (TEXT), _meta/flow_truncated (BOOL), flow_published_at (TIMESTAMPTZ), int64 (INT8), multiple (JSON), nonScalarValue (JSON), optional (JSON), scalarValue (TEXT), flow_document (JSON), boolWidenedToJson (JSON), dateValue (TEXT), datetimeValue (TEXT), intWidenedToJson (JSON), numericString (TEXT), requiredNumeric (TEXT), stringWidenedToJson (JSON), timeValue (TEXT) -1, false, 2024-09-13T01:01:01Z, 1, , , , test, {}, true, 2024-01-01, 2024-01-01T01:01:01.111111Z, 999, 123, 456, "hello", 01:01:01 +key (TEXT), _meta/flow_truncated (BOOL), flow_published_at (TIMESTAMPTZ), multiple (JSON), nonScalarValue (JSON), optional (JSON), scalarValue (TEXT), flow_document (JSON), boolWidenedToJson (JSON), dateValue (TEXT), datetimeValue (TEXT), int64 (NUMERIC), intWidenedToJson (JSON), numericString (TEXT), requiredNumeric (TEXT), stringWidenedToJson (JSON), timeValue (TEXT) +1, false, 2024-09-13T01:01:01Z, , , , test, {}, true, 2024-01-01, 2024-01-01T01:01:01.111111Z, 1, 999, 123, 456, "hello", 01:01:01 diff --git a/materialize-postgres/sqlgen.go b/materialize-postgres/sqlgen.go index 7b03c08958..6292231a67 100644 --- a/materialize-postgres/sqlgen.go +++ b/materialize-postgres/sqlgen.go @@ -56,7 +56,7 @@ var pgDialect = func() sql.Dialect { return sql.Dialect{ MigratableTypes: sql.MigrationSpecs{ "numeric": {sql.NewMigrationSpec([]string{"text"})}, - "integer": {sql.NewMigrationSpec([]string{"text"})}, + "bigint": {sql.NewMigrationSpec([]string{"numeric", "text"})}, "double precision": {sql.NewMigrationSpec([]string{"text"})}, "date": {sql.NewMigrationSpec([]string{"text"})}, "time without time zone": {sql.NewMigrationSpec([]string{"text"})}, diff --git a/materialize-redshift/.snapshots/TestValidateAndApplyMigrations b/materialize-redshift/.snapshots/TestValidateAndApplyMigrations index 5934a3f784..442ac42ec2 100644 --- a/materialize-redshift/.snapshots/TestValidateAndApplyMigrations +++ b/materialize-redshift/.snapshots/TestValidateAndApplyMigrations @@ -71,7 +71,7 @@ Migratable Changes Applied Schema: {"Name":"datevalue","Nullable":"YES","Type":"character varying"} {"Name":"flow_document","Nullable":"YES","Type":"super"} {"Name":"flow_published_at","Nullable":"YES","Type":"timestamp with time zone"} -{"Name":"int64","Nullable":"YES","Type":"bigint"} +{"Name":"int64","Nullable":"YES","Type":"numeric"} {"Name":"intwidenedtojson","Nullable":"YES","Type":"super"} {"Name":"key","Nullable":"YES","Type":"character varying"} {"Name":"multiple","Nullable":"YES","Type":"super"} @@ -85,6 +85,6 @@ Migratable Changes Applied Schema: Migratable Changes Applied Data: -key (VARCHAR), _meta/flow_truncated (BOOL), boolwidenedtojson (TIMESTAMPTZ), datevalue (INT8), datetimevalue (4000), flow_published_at (4000), int64 (4000), intwidenedtojson (VARCHAR), multiple (VARCHAR), nonscalarvalue (4000), numericstring (4000), optional (VARCHAR), requirednumeric (VARCHAR), scalarvalue (4000), stringwidenedtojson (VARCHAR), timevalue (VARCHAR), flow_document (4000) -1, false, 2024-09-13T01:01:01Z, 1, , , , test, 01:01:01, "{}", true, 2024-01-01, 2024-01-01T01:01:01.111111Z, 999, 123, 456, "hello" +key (VARCHAR), _meta/flow_truncated (BOOL), boolwidenedtojson (TIMESTAMPTZ), datevalue (4000), datetimevalue (4000), flow_published_at (4000), int64 (VARCHAR), intwidenedtojson (VARCHAR), multiple (4000), nonscalarvalue (4000), numericstring (VARCHAR), optional (VARCHAR), requirednumeric (NUMERIC), scalarvalue (4000), stringwidenedtojson (VARCHAR), timevalue (VARCHAR), flow_document (4000) +1, false, 2024-09-13T01:01:01Z, , , , test, 01:01:01, "{}", true, 2024-01-01, 2024-01-01T01:01:01.111111Z, 1, 999, 123, 456, "hello" diff --git a/materialize-redshift/sqlgen.go b/materialize-redshift/sqlgen.go index dc9c7aecc1..6d646e20c6 100644 --- a/materialize-redshift/sqlgen.go +++ b/materialize-redshift/sqlgen.go @@ -99,7 +99,7 @@ var rsDialect = func(caseSensitiveIdentifierEnabled bool) sql.Dialect { return sql.Dialect{ MigratableTypes: sql.MigrationSpecs{ "numeric": {sql.NewMigrationSpec([]string{"text"})}, - "bigint": {sql.NewMigrationSpec([]string{"text"})}, + "bigint": {sql.NewMigrationSpec([]string{"numeric(38,0)", "text"})}, "double precision": {sql.NewMigrationSpec([]string{"text"})}, "date": {sql.NewMigrationSpec([]string{"text"})}, "time without time zone": {sql.NewMigrationSpec([]string{"text"})}, diff --git a/materialize-s3-csv/.snapshots/TestSpec b/materialize-s3-csv/.snapshots/TestSpec index 51ff2a4486..71039529b2 100644 --- a/materialize-s3-csv/.snapshots/TestSpec +++ b/materialize-s3-csv/.snapshots/TestSpec @@ -61,18 +61,6 @@ }, "csvConfig": { "properties": { - "delimiter": { - "type": "string", - "title": "Delimiter", - "description": "Character to separate columns within a row. Defaults to a comma if blank. Must be a single character with a byte length of 1.", - "order": 0 - }, - "nullString": { - "type": "string", - "title": "Null String", - "description": "String to use to represent NULL values. Defaults to an empty string if blank.", - "order": 1 - }, "skipHeaders": { "type": "boolean", "title": "Skip Headers", diff --git a/materialize-snowflake/sqlgen_test.go b/materialize-snowflake/sqlgen_test.go index e8b78ff1e5..6ccc2f4658 100644 --- a/materialize-snowflake/sqlgen_test.go +++ b/materialize-snowflake/sqlgen_test.go @@ -42,17 +42,17 @@ func TestSQLGeneration(t *testing.T) { bounds := []sql.MergeBound{ { - Identifier: tbl.Keys[0].Identifier, + Column: tbl.Keys[0], LiteralLower: testDialect.Literal(int64(10)), LiteralUpper: testDialect.Literal(int64(100)), }, { - Identifier: tbl.Keys[1].Identifier, + Column: tbl.Keys[1], // No bounds - as would be the case for a boolean key, which // would be a very weird key, but technically allowed. }, { - Identifier: tbl.Keys[2].Identifier, + Column: tbl.Keys[2], LiteralLower: testDialect.Literal("aGVsbG8K"), LiteralUpper: testDialect.Literal("Z29vZGJ5ZQo="), }, diff --git a/materialize-sql/templating.go b/materialize-sql/templating.go index 19956d618d..1a57ce12c2 100644 --- a/materialize-sql/templating.go +++ b/materialize-sql/templating.go @@ -49,9 +49,7 @@ func RenderTableTemplate(table Table, tpl *template.Template) (string, error) { // store, and providing the range hints in the merge query directly may allow // for warehouses to do additional optimizations when executing the merge. type MergeBound struct { - // Identifier is the identifier for the key column this bound applies to, - // with the dialect's quoting applied. - Identifier string + Column // LiteralLower will be an empty string if no condition should be used, // which is the case for boolean keys. LiteralLower string @@ -142,23 +140,21 @@ func (b *MergeBoundsBuilder) Build() []MergeBound { conditions := make([]MergeBound, len(b.lower)) for idx, col := range b.keyColumns { - conditions[idx] = MergeBound{ - Identifier: col.Identifier, - } - - lower := b.lower[idx] - upper := b.upper[idx] + conditions[idx] = MergeBound{Column: col} - if _, ok := lower.(bool); ok { + ft, _ := col.AsFlatType() + if ft == BOOLEAN { // Boolean keys cannot reasonably support bounds for merge queries. - // It is assumed that if the lower value is a boolean type then the - // upper value must be as well since we do not allow keys with - // multiple types in SQL materializations. + continue + } else if ft == BINARY { + // Binary keys could in principal be used as merge query bounds, but + // the complexity and overhead of comparing their binary values is + // probably not worth it. continue } - conditions[idx].LiteralLower = b.literaler(lower) - conditions[idx].LiteralUpper = b.literaler(upper) + conditions[idx].LiteralLower = b.literaler(b.lower[idx]) + conditions[idx].LiteralUpper = b.literaler(b.upper[idx]) } // Reset for tracking the next transaction. diff --git a/materialize-sql/templating_test.go b/materialize-sql/templating_test.go index f9ca88d43f..0a9243952f 100644 --- a/materialize-sql/templating_test.go +++ b/materialize-sql/templating_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/bradleyjkemp/cupaloy" + pf "github.com/estuary/flow/go/protocols/flow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -90,6 +91,14 @@ func TestTableTemplate(t *testing.T) { func TestMergeBoundsBuilder(t *testing.T) { literaler := ToLiteralFn(QuoteTransform("'", "''")) + colA := Column{Identifier: "colA"} + colB := Column{Identifier: "colB", Projection: Projection{Projection: pf.Projection{ + Inference: pf.Inference{ + Types: []string{"boolean"}, + }, + }}} + colC := Column{Identifier: "colC"} + for _, tt := range []struct { name string keyColumns []Column @@ -97,49 +106,43 @@ func TestMergeBoundsBuilder(t *testing.T) { want []MergeBound }{ { - name: "single key", - keyColumns: []Column{ - {Identifier: "colA"}, - }, + name: "single key", + keyColumns: []Column{colA}, keys: [][]any{ {"a"}, {"b"}, {"c"}, }, want: []MergeBound{ - {"colA", literaler("a"), literaler("c")}, + {colA, literaler("a"), literaler("c")}, }, }, { - name: "multiple keys ordered", - keyColumns: []Column{ - {Identifier: "colA"}, {Identifier: "colB"}, {Identifier: "colC"}, - }, + name: "multiple keys ordered", + keyColumns: []Column{colA, colB, colC}, keys: [][]any{ {"a", true, int64(1)}, {"b", false, int64(2)}, {"c", true, int64(3)}, }, want: []MergeBound{ - {"colA", literaler("a"), literaler("c")}, - {"colB", "", ""}, - {"colC", literaler(int64(1)), literaler(int64(3))}, + {colA, literaler("a"), literaler("c")}, + {colB, "", ""}, + {colC, literaler(int64(1)), literaler(int64(3))}, }, }, { - name: "multiple keys unordered", - keyColumns: []Column{ - {Identifier: "colA"}, {Identifier: "colB"}, {Identifier: "colC"}, - }, + name: "multiple keys unordered", + keyColumns: []Column{colA, colB, colC}, keys: [][]any{ {"a", true, int64(3)}, {"b", false, int64(1)}, {"c", true, int64(2)}, }, want: []MergeBound{ - {"colA", literaler("a"), literaler("c")}, - {"colB", "", ""}, - {"colC", literaler(int64(1)), literaler(int64(3))}, + {colA, literaler("a"), literaler("c")}, + {colB, "", ""}, + {colC, literaler(int64(1)), literaler(int64(3))}, }, }, } { diff --git a/materialize-sql/testdata/generated_specs/flow.proto b/materialize-sql/testdata/generated_specs/flow.proto index 7f364f0c10..89266aebf1 100644 Binary files a/materialize-sql/testdata/generated_specs/flow.proto and b/materialize-sql/testdata/generated_specs/flow.proto differ diff --git a/materialize-sql/testdata/validate/generated_specs/base.flow.proto b/materialize-sql/testdata/validate/generated_specs/base.flow.proto index 0694ef1643..c971187925 100644 Binary files a/materialize-sql/testdata/validate/generated_specs/base.flow.proto and b/materialize-sql/testdata/validate/generated_specs/base.flow.proto differ diff --git a/materialize-sql/testdata/validate/generated_specs/migratable-changes.flow.proto b/materialize-sql/testdata/validate/generated_specs/migratable-changes.flow.proto index 517c6e0816..db09f2923b 100644 Binary files a/materialize-sql/testdata/validate/generated_specs/migratable-changes.flow.proto and b/materialize-sql/testdata/validate/generated_specs/migratable-changes.flow.proto differ diff --git a/materialize-sql/testdata/validate/migratable-changes.flow.yaml b/materialize-sql/testdata/validate/migratable-changes.flow.yaml index fbf7cd389c..874e178d57 100644 --- a/materialize-sql/testdata/validate/migratable-changes.flow.yaml +++ b/materialize-sql/testdata/validate/migratable-changes.flow.yaml @@ -13,7 +13,7 @@ collections: dateValue: { type: string } datetimeValue: { type: string } timeValue: { type: string } - int64: { type: integer, maximum: 9223372036854775808} + int64: { type: integer, maximum: 10000000000000000000} requiredNumeric: { type: string } stringWidenedToJson: { type: [string, integer, boolean] } intWidenedToJson: { type: [string, integer, boolean] } diff --git a/tests/materialize/materialize-azure-fabric-warehouse/cleanup.sh b/tests/materialize/materialize-azure-fabric-warehouse/cleanup.sh new file mode 100755 index 0000000000..d262a9ff14 --- /dev/null +++ b/tests/materialize/materialize-azure-fabric-warehouse/cleanup.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e + +function dropTable() { + go run ${TEST_DIR}/materialize-azure-fabric-warehouse/fetch-data.go --delete "$1" +} + +dropTable "simple" +dropTable "duplicate_keys_standard" +dropTable "duplicate_keys_delta" +dropTable "multiple_types" +dropTable "formatted_strings" +dropTable "deletions" +dropTable "binary_key" + +go run ${TEST_DIR}/materialize-azure-fabric-warehouse/fetch-data.go --delete-checkpoint notable diff --git a/tests/materialize/materialize-azure-fabric-warehouse/config.yaml b/tests/materialize/materialize-azure-fabric-warehouse/config.yaml new file mode 100644 index 0000000000..8c7ee36471 --- /dev/null +++ b/tests/materialize/materialize-azure-fabric-warehouse/config.yaml @@ -0,0 +1,25 @@ +clientID: 91d36d73-9e3e-4ba1-a468-d7d61702d4a2 +clientSecret_sops: ENC[AES256_GCM,data:aLEZb0pxvTyKztS2vvaNzSYF8wpULA896h4+a3wMjuGUMzTYgoiiAA==,iv:VGkaCJMcs+dzhxWjkt6lFYuFFchUcXG9TvyolosnnsY=,tag:j/n2zMrdpEl90C6TYnqD3Q==,type:str] +warehouse: whb_warehouse +schema: estuary_test_schema +connectionString: jc5wrnsaqm5eplfxoiid6tylbe-hb5yc63i54uuld4ymr7gheq5yi.datawarehouse.fabric.microsoft.com +storageAccountName: whbfabricstaging +storageAccountKey_sops: ENC[AES256_GCM,data:XRTcO/kpWDSAq7H7YWdE9SBK9Uo1E0gpSuA3p7sg22VA9mhbXCLP26kyWaVnQm9+IaK9VTi//GFxqVAbWWr0fMWmtkh8Em46oZO8HsFf/7mI19QjghwfSQ==,iv:f8bJnZ1LXnMZMhGYh0JTwFPmXoVJJKwzKUev0OXTA+Q=,tag:LEVJ4P9uqnuJcGmMdBGJmQ==,type:str] +containerName: tmp +hardDelete: true +syncSchedule: + syncFrequency: 0s +sops: + kms: [] + gcp_kms: + - resource_id: projects/estuary-theatre/locations/us-central1/keyRings/connector-keyring/cryptoKeys/connector-repository + created_at: "2024-02-01T17:08:49Z" + enc: CiQAdmEdwu+udGkiZbfnnE8djh5o4P8p4Rm3WeXcN0XpcDF4sewSSQCVvC1zSqNDZIN7J2EPjuvJM4ILtloOm9w/OGsE4YwB2qH+sKsDDBYuIDDXtHml6w4wG0iksPY1Yxty0casvU6v02mfM3QOvyk= + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2025-01-23T16:43:43Z" + mac: ENC[AES256_GCM,data:51LJ2l8bnb26p7GyoHvpsadwGRuNsFrMvvukYamOsWeRi78A/znYmZd40kFnTn7zQydC2EyBzB141Iz5VVPKEmuvM40WTYUby2rdGj5GTaYJvBHsvwITkBW1h+IaDkwU+T7vEb9BiIZnNeFEwbHzBw2wJidnHXPNnxpMgjf93f4=,iv:64mvPbohXVQugJ1qXOtrY9YfPwnFACyF2MzUPLQXxH4=,tag:2wMVeOxkgArRYFY7Ji34SA==,type:str] + pgp: [] + encrypted_suffix: _sops + version: 3.8.1 diff --git a/tests/materialize/materialize-azure-fabric-warehouse/fetch-data.go b/tests/materialize/materialize-azure-fabric-warehouse/fetch-data.go new file mode 100644 index 0000000000..f8bfa06d78 --- /dev/null +++ b/tests/materialize/materialize-azure-fabric-warehouse/fetch-data.go @@ -0,0 +1,114 @@ +package main + +import ( + "database/sql" + "encoding/json" + "flag" + "fmt" + "log" + "os" + "time" + + "github.com/microsoft/go-mssqldb/azuread" +) + +var deleteTable = flag.Bool("delete", false, "delete the table instead of dumping its contents") +var deleteCheckpoint = flag.Bool("delete-checkpoint", false, "remove the stored materialization checkpoint") + +func main() { + flag.Parse() + + tables := flag.Args() + if len(tables) != 1 { + log.Fatal("must provide table name as an argument") + } + + connectionString, ok := os.LookupEnv("FABRIC_WAREHOUSE_CONNECTION_STRING") + if !ok { + log.Fatal("missing FABRIC_WAREHOUSE_CONNECTION_STRING environment variable") + } + + clientID, ok := os.LookupEnv("FABRIC_WAREHOUSE_CLIENT_ID") + if !ok { + log.Fatal("missing FABRIC_WAREHOUSE_CLIENT_ID environment variable") + } + + clientSecret, ok := os.LookupEnv("FABRIC_WAREHOUSE_CLIENT_SECRET") + if !ok { + log.Fatal("missing FABRIC_WAREHOUSE_CLIENT_SECRET environment variable") + } + + warehouse, ok := os.LookupEnv("FABRIC_WAREHOUSE_WAREHOUSE") + if !ok { + log.Fatal("missing FABRIC_WAREHOUSE_WAREHOUSE environment variable") + } + + schema, ok := os.LookupEnv("FABRIC_WAREHOUSE_SCHEMA") + if !ok { + log.Fatal("missing FABRIC_WAREHOUSE_SCHEMA environment variable") + } + + db, err := sql.Open( + azuread.DriverName, + fmt.Sprintf( + "server=%s;user id=%s;password=%s;port=%d;database=%s;fedauth=ActiveDirectoryServicePrincipal", + connectionString, clientID, clientSecret, 1433, warehouse, + )) + if err != nil { + log.Fatal(fmt.Errorf("connecting to db: %w", err)) + } + defer db.Close() + + if *deleteTable { + if _, err := db.Exec(fmt.Sprintf(`DROP TABLE %q.%q`, schema, tables[0])); err != nil { + fmt.Println(fmt.Errorf("could not drop table %s: %w", tables[0], err)) + } + os.Exit(0) + } else if *deleteCheckpoint { + if _, err := db.Exec(fmt.Sprintf("delete from %s.flow_checkpoints_v1 where materialization='tests/materialize-azure-fabric-warehouse/materialize'", schema)); err != nil { + fmt.Println(fmt.Errorf("could not delete checkpoint: %w", err)) + } + os.Exit(0) + } + + rows, err := db.Query(fmt.Sprintf(`SELECT * FROM %q.%q ORDER BY id`, schema, tables[0])) + if err != nil { + log.Fatal(fmt.Errorf("running query: %w", err)) + } + defer rows.Close() + + cols, err := rows.Columns() + if err != nil { + log.Fatal(fmt.Errorf("reading columns: %w", err)) + } + + data := make([]interface{}, len(cols)) + ptrs := make([]interface{}, len(cols)) + for i := range data { + ptrs[i] = &data[i] + } + + queriedRows := []map[string]any{} + + for rows.Next() { + if err = rows.Scan(ptrs...); err != nil { + log.Fatal("scanning row: %w", err) + } + row := make(map[string]any) + for idx := range data { + d := data[idx] + if t, ok := d.(time.Time); ok { + // Go JSON encoding apparently doesn't like timestamps with years 9999. + d = t.UTC().String() + } + row[cols[idx]] = d + } + + queriedRows = append(queriedRows, row) + } + rows.Close() + + if err := json.NewEncoder(os.Stdout).Encode(queriedRows); err != nil { + log.Fatal(fmt.Errorf("writing output: %w", err)) + } +} diff --git a/tests/materialize/materialize-azure-fabric-warehouse/fetch.sh b/tests/materialize/materialize-azure-fabric-warehouse/fetch.sh new file mode 100755 index 0000000000..5c50c91311 --- /dev/null +++ b/tests/materialize/materialize-azure-fabric-warehouse/fetch.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +function exportToJsonl() { + go run "${TEST_DIR}"/materialize-azure-fabric-warehouse/fetch-data.go "$1" | jq "{ "_table": \"$1\", rows: . }" +} + +exportToJsonl "simple" +exportToJsonl "duplicate_keys_standard" +exportToJsonl "duplicate_keys_delta" +exportToJsonl "multiple_types" +exportToJsonl "formatted_strings" +exportToJsonl "deletions" +exportToJsonl "binary_key" diff --git a/tests/materialize/materialize-azure-fabric-warehouse/setup.sh b/tests/materialize/materialize-azure-fabric-warehouse/setup.sh new file mode 100755 index 0000000000..25714a5a37 --- /dev/null +++ b/tests/materialize/materialize-azure-fabric-warehouse/setup.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +resources_json_template='[ + { + "resource": { + "table": "simple" + }, + "source": "${TEST_COLLECTION_SIMPLE}" + }, + { + "resource": { + "table": "duplicate_keys_standard" + }, + "source": "${TEST_COLLECTION_DUPLICATED_KEYS}" + }, + { + "resource": { + "table": "duplicate_keys_delta", + "delta_updates": true + }, + "source": "${TEST_COLLECTION_DUPLICATED_KEYS}" + }, + { + "resource": { + "table": "multiple_types" + }, + "source": "${TEST_COLLECTION_MULTIPLE_DATATYPES}", + "fields": { + "recommended": true, + "exclude": ["nested/id"], + "include": { + "nested": {}, + "array_int": {}, + "multiple": {} + } + } + }, + { + "resource": { + "table": "formatted_strings" + }, + "source": "${TEST_COLLECTION_FORMATTED_STRINGS}", + "fields": { + "recommended": true + } + }, + { + "resource": { + "table": "deletions" + }, + "source": "${TEST_COLLECTION_DELETIONS}" + }, + { + "resource": { + "table": "binary_key" + }, + "source": "${TEST_COLLECTION_BINARY_KEY}" + } +]' + +export CONNECTOR_CONFIG="$(decrypt_config ${TEST_DIR}/${CONNECTOR}/config.yaml)" +export FABRIC_WAREHOUSE_CLIENT_ID="$(echo $CONNECTOR_CONFIG | jq -r .clientID)" +export FABRIC_WAREHOUSE_CLIENT_SECRET="$(echo $CONNECTOR_CONFIG | jq -r .clientSecret)" +export FABRIC_WAREHOUSE_CONNECTION_STRING="$(echo $CONNECTOR_CONFIG | jq -r .connectionString)" +export FABRIC_WAREHOUSE_WAREHOUSE="$(echo $CONNECTOR_CONFIG | jq -r .warehouse)" +export FABRIC_WAREHOUSE_SCHEMA="$(echo $CONNECTOR_CONFIG | jq -r .schema)" + +export RESOURCES_CONFIG="$(echo "$resources_json_template" | envsubst | jq -c)" diff --git a/tests/materialize/materialize-azure-fabric-warehouse/snapshot.json b/tests/materialize/materialize-azure-fabric-warehouse/snapshot.json new file mode 100644 index 0000000000..8e272419fd --- /dev/null +++ b/tests/materialize/materialize-azure-fabric-warehouse/snapshot.json @@ -0,0 +1,472 @@ +[ + "applied.actionDescription", + "\nCREATE TABLE whb_warehouse.estuary_test_schema.simple (\n\tid BIGINT,\n\tcanary VARCHAR(MAX),\n\tflow_published_at DATETIME2(6),\n\tflow_document VARCHAR(MAX)\n);\n\n\nCREATE TABLE whb_warehouse.estuary_test_schema.duplicate_keys_standard (\n\tid BIGINT,\n\tflow_published_at DATETIME2(6),\n\t\"int\" BIGINT,\n\tstr VARCHAR(MAX),\n\tflow_document VARCHAR(MAX)\n);\n\n\nCREATE TABLE whb_warehouse.estuary_test_schema.duplicate_keys_delta (\n\tid BIGINT,\n\tflow_published_at DATETIME2(6),\n\t\"int\" BIGINT,\n\tstr VARCHAR(MAX),\n\tflow_document VARCHAR(MAX)\n);\n\n\nCREATE TABLE whb_warehouse.estuary_test_schema.multiple_types (\n\tid BIGINT,\n\tarray_int VARCHAR(MAX),\n\tbinary_field VARBINARY(MAX),\n\tbool_field BIT,\n\tfloat_field FLOAT,\n\tflow_published_at DATETIME2(6),\n\tmultiple VARCHAR(MAX),\n\tnested VARCHAR(MAX),\n\tnullable_int BIGINT,\n\tstr_field VARCHAR(MAX),\n\tflow_document VARCHAR(MAX)\n);\n\n\nCREATE TABLE whb_warehouse.estuary_test_schema.formatted_strings (\n\tid BIGINT,\n\t\"date\" DATE,\n\tdatetime DATETIME2(6),\n\tflow_published_at DATETIME2(6),\n\tint_and_str DECIMAL(38,0),\n\tint_str DECIMAL(38,0),\n\tnum_and_str FLOAT,\n\tnum_str FLOAT,\n\t\"time\" TIME(6),\n\tflow_document VARCHAR(MAX)\n);\n\n\nCREATE TABLE whb_warehouse.estuary_test_schema.deletions (\n\tid BIGINT,\n\t\"_meta/op\" VARCHAR(MAX),\n\tflow_published_at DATETIME2(6),\n\tflow_document VARCHAR(MAX)\n);\n\n\nCREATE TABLE whb_warehouse.estuary_test_schema.binary_key (\n\tid VARBINARY(MAX),\n\tcounter BIGINT,\n\tflow_published_at DATETIME2(6),\n\tflow_document VARCHAR(MAX)\n);\n" +] +[ + "connectorState", + {} +] +[ + "connectorState", + {} +] +[ + "connectorState", + { + "updated": {} + } +] +{ + "_table": "simple", + "rows": [ + { + "canary": "amputation's", + "flow_document": "{\"_meta\":{\"uuid\":\"13814000-1dd2-11b2-8000-071353030311\"},\"canary\":\"amputation's\",\"id\":1}", + "flow_published_at": "1970-01-01 00:00:00 +0000 UTC", + "id": 1 + }, + { + "canary": "armament's", + "flow_document": "{\"_meta\":{\"uuid\":\"1419d680-1dd2-11b2-8000-071353030311\"},\"canary\":\"armament's\",\"id\":2}", + "flow_published_at": "1970-01-01 00:00:01 +0000 UTC", + "id": 2 + }, + { + "canary": "splatters", + "flow_document": "{\"_meta\":{\"uuid\":\"14b26d00-1dd2-11b2-8000-071353030311\"},\"canary\":\"splatters\",\"id\":3}", + "flow_published_at": "1970-01-01 00:00:02 +0000 UTC", + "id": 3 + }, + { + "canary": "strengthen", + "flow_document": "{\"_meta\":{\"uuid\":\"154b0380-1dd2-11b2-8000-071353030311\"},\"canary\":\"strengthen\",\"id\":4}", + "flow_published_at": "1970-01-01 00:00:03 +0000 UTC", + "id": 4 + }, + { + "canary": "Kringle's", + "flow_document": "{\"_meta\":{\"uuid\":\"15e39a00-1dd2-11b2-8000-071353030311\"},\"canary\":\"Kringle's\",\"id\":5}", + "flow_published_at": "1970-01-01 00:00:04 +0000 UTC", + "id": 5 + }, + { + "canary": "grosbeak's", + "flow_document": "{\"_meta\":{\"uuid\":\"167c3080-1dd2-11b2-8000-071353030311\"},\"canary\":\"grosbeak's\",\"id\":6}", + "flow_published_at": "1970-01-01 00:00:05 +0000 UTC", + "id": 6 + }, + { + "canary": "pieced", + "flow_document": "{\"_meta\":{\"uuid\":\"7545a800-1dda-11b2-8000-071353030311\"},\"canary\":\"pieced\",\"id\":7}", + "flow_published_at": "1970-01-01 01:00:00 +0000 UTC", + "id": 7 + }, + { + "canary": "roaches", + "flow_document": "{\"_meta\":{\"uuid\":\"75de3e80-1dda-11b2-8000-071353030311\"},\"canary\":\"roaches\",\"id\":8}", + "flow_published_at": "1970-01-01 01:00:01 +0000 UTC", + "id": 8 + }, + { + "canary": "devilish", + "flow_document": "{\"_meta\":{\"uuid\":\"7676d500-1dda-11b2-8000-071353030311\"},\"canary\":\"devilish\",\"id\":9}", + "flow_published_at": "1970-01-01 01:00:02 +0000 UTC", + "id": 9 + }, + { + "canary": "glucose's", + "flow_document": "{\"_meta\":{\"uuid\":\"770f6b80-1dda-11b2-8000-071353030311\"},\"canary\":\"glucose's\",\"id\":10}", + "flow_published_at": "1970-01-01 01:00:03 +0000 UTC", + "id": 10 + } + ] +} +{ + "_table": "duplicate_keys_standard", + "rows": [ + { + "flow_document": "{\"_meta\":{\"uuid\":\"77a80200-1dda-11b2-8000-071353030311\"},\"id\":1,\"int\":7,\"str\":\"str 6\"}", + "flow_published_at": "1970-01-01 01:00:04 +0000 UTC", + "id": 1, + "int": 7, + "str": "str 6" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"78409880-1dda-11b2-8000-071353030311\"},\"id\":2,\"int\":9,\"str\":\"str 7\"}", + "flow_published_at": "1970-01-01 01:00:05 +0000 UTC", + "id": 2, + "int": 9, + "str": "str 7" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"78d92f00-1dda-11b2-8000-071353030311\"},\"id\":3,\"int\":11,\"str\":\"str 8\"}", + "flow_published_at": "1970-01-01 01:00:06 +0000 UTC", + "id": 3, + "int": 11, + "str": "str 8" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"7971c580-1dda-11b2-8000-071353030311\"},\"id\":4,\"int\":13,\"str\":\"str 9\"}", + "flow_published_at": "1970-01-01 01:00:07 +0000 UTC", + "id": 4, + "int": 13, + "str": "str 9" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"7a0a5c00-1dda-11b2-8000-071353030311\"},\"id\":5,\"int\":15,\"str\":\"str 10\"}", + "flow_published_at": "1970-01-01 01:00:08 +0000 UTC", + "id": 5, + "int": 15, + "str": "str 10" + } + ] +} +{ + "_table": "duplicate_keys_delta", + "rows": [ + { + "flow_document": "{\"_meta\":{\"uuid\":\"77a80200-1dda-11b2-8000-071353030311\"},\"id\":1,\"int\":6,\"str\":\"str 6\"}", + "flow_published_at": "1970-01-01 01:00:04 +0000 UTC", + "id": 1, + "int": 6, + "str": "str 6" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"1714c700-1dd2-11b2-8000-071353030311\"},\"id\":1,\"int\":1,\"str\":\"str 1\"}", + "flow_published_at": "1970-01-01 00:00:06 +0000 UTC", + "id": 1, + "int": 1, + "str": "str 1" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"78409880-1dda-11b2-8000-071353030311\"},\"id\":2,\"int\":7,\"str\":\"str 7\"}", + "flow_published_at": "1970-01-01 01:00:05 +0000 UTC", + "id": 2, + "int": 7, + "str": "str 7" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"17ad5d80-1dd2-11b2-8000-071353030311\"},\"id\":2,\"int\":2,\"str\":\"str 2\"}", + "flow_published_at": "1970-01-01 00:00:07 +0000 UTC", + "id": 2, + "int": 2, + "str": "str 2" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"78d92f00-1dda-11b2-8000-071353030311\"},\"id\":3,\"int\":8,\"str\":\"str 8\"}", + "flow_published_at": "1970-01-01 01:00:06 +0000 UTC", + "id": 3, + "int": 8, + "str": "str 8" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"1845f400-1dd2-11b2-8000-071353030311\"},\"id\":3,\"int\":3,\"str\":\"str 3\"}", + "flow_published_at": "1970-01-01 00:00:08 +0000 UTC", + "id": 3, + "int": 3, + "str": "str 3" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"7971c580-1dda-11b2-8000-071353030311\"},\"id\":4,\"int\":9,\"str\":\"str 9\"}", + "flow_published_at": "1970-01-01 01:00:07 +0000 UTC", + "id": 4, + "int": 9, + "str": "str 9" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"18de8a80-1dd2-11b2-8000-071353030311\"},\"id\":4,\"int\":4,\"str\":\"str 4\"}", + "flow_published_at": "1970-01-01 00:00:09 +0000 UTC", + "id": 4, + "int": 4, + "str": "str 4" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"7a0a5c00-1dda-11b2-8000-071353030311\"},\"id\":5,\"int\":10,\"str\":\"str 10\"}", + "flow_published_at": "1970-01-01 01:00:08 +0000 UTC", + "id": 5, + "int": 10, + "str": "str 10" + }, + { + "flow_document": "{\"_meta\":{\"uuid\":\"19772100-1dd2-11b2-8000-071353030311\"},\"id\":5,\"int\":5,\"str\":\"str 5\"}", + "flow_published_at": "1970-01-01 00:00:10 +0000 UTC", + "id": 5, + "int": 5, + "str": "str 5" + } + ] +} +{ + "_table": "multiple_types", + "rows": [ + { + "array_int": "[11,12]", + "binary_field": null, + "bool_field": false, + "float_field": 1.1, + "flow_document": "{\"_meta\":{\"uuid\":\"1b40e480-1dd2-11b2-8000-071353030311\"},\"array_int\":[11,12],\"bool_field\":false,\"float_field\":1.1,\"id\":1,\"multiple\":1,\"nested\":{\"id\":\"i1\"},\"nullable_int\":null,\"str_field\":\"str1\"}", + "flow_published_at": "1970-01-01 00:00:13 +0000 UTC", + "id": 1, + "multiple": "1", + "nested": "{\"id\":\"i1\"}", + "nullable_int": null, + "str_field": "str1" + }, + { + "array_int": "[21,22]", + "binary_field": null, + "bool_field": true, + "float_field": 2.2, + "flow_document": "{\"_meta\":{\"uuid\":\"1bd97b00-1dd2-11b2-8000-071353030311\"},\"array_int\":[21,22],\"bool_field\":true,\"float_field\":2.2,\"id\":2,\"multiple\":2.2,\"nested\":{\"id\":\"i2\"},\"nullable_int\":2,\"str_field\":\"str2\"}", + "flow_published_at": "1970-01-01 00:00:14 +0000 UTC", + "id": 2, + "multiple": "2.2", + "nested": "{\"id\":\"i2\"}", + "nullable_int": 2, + "str_field": "str2" + }, + { + "array_int": "[31,32]", + "binary_field": null, + "bool_field": false, + "float_field": 3.3, + "flow_document": "{\"_meta\":{\"uuid\":\"1c721180-1dd2-11b2-8000-071353030311\"},\"array_int\":[31,32],\"bool_field\":false,\"float_field\":3.3,\"id\":3,\"multiple\":true,\"nested\":{\"id\":\"i3\"},\"nullable_int\":null,\"str_field\":\"str3\"}", + "flow_published_at": "1970-01-01 00:00:15 +0000 UTC", + "id": 3, + "multiple": "true", + "nested": "{\"id\":\"i3\"}", + "nullable_int": null, + "str_field": "str3" + }, + { + "array_int": "[41,42]", + "binary_field": null, + "bool_field": true, + "float_field": 4.4, + "flow_document": "{\"_meta\":{\"uuid\":\"1d0aa800-1dd2-11b2-8000-071353030311\"},\"array_int\":[41,42],\"bool_field\":true,\"float_field\":4.4,\"id\":4,\"multiple\":false,\"nested\":{\"id\":\"i4\"},\"nullable_int\":4,\"str_field\":\"str4\"}", + "flow_published_at": "1970-01-01 00:00:16 +0000 UTC", + "id": 4, + "multiple": "false", + "nested": "{\"id\":\"i4\"}", + "nullable_int": 4, + "str_field": "str4" + }, + { + "array_int": "[51,52]", + "binary_field": null, + "bool_field": false, + "float_field": 5.5, + "flow_document": "{\"_meta\":{\"uuid\":\"1da33e80-1dd2-11b2-8000-071353030311\"},\"array_int\":[51,52],\"bool_field\":false,\"float_field\":5.5,\"id\":5,\"multiple\":\"string five\",\"nested\":{\"id\":\"i5\"},\"nullable_int\":null,\"str_field\":\"str5\"}", + "flow_published_at": "1970-01-01 00:00:17 +0000 UTC", + "id": 5, + "multiple": "\"string five\"", + "nested": "{\"id\":\"i5\"}", + "nullable_int": null, + "str_field": "str5" + }, + { + "array_int": "[61,62]", + "binary_field": null, + "bool_field": true, + "float_field": 66.66, + "flow_document": "{\"_meta\":{\"uuid\":\"8098d380-1dda-11b2-8000-071353030311\"},\"array_int\":[61,62],\"bool_field\":true,\"float_field\":66.66,\"id\":6,\"multiple\":[\"one\",2,true],\"nested\":{\"id\":\"i6\"},\"nullable_int\":6,\"str_field\":\"str6 v2\"}", + "flow_published_at": "1970-01-01 01:00:19 +0000 UTC", + "id": 6, + "multiple": "[\"one\",2,true]", + "nested": "{\"id\":\"i6\"}", + "nullable_int": 6, + "str_field": "str6 v2" + }, + { + "array_int": "[71,72]", + "binary_field": null, + "bool_field": false, + "float_field": 77.77, + "flow_document": "{\"_meta\":{\"uuid\":\"81316a00-1dda-11b2-8000-071353030311\"},\"array_int\":[71,72],\"bool_field\":false,\"float_field\":77.77,\"id\":7,\"multiple\":{\"object\":\"seven\"},\"nested\":{\"id\":\"i7\"},\"nullable_int\":null,\"str_field\":\"str7 v2\"}", + "flow_published_at": "1970-01-01 01:00:20 +0000 UTC", + "id": 7, + "multiple": "{\"object\":\"seven\"}", + "nested": "{\"id\":\"i7\"}", + "nullable_int": null, + "str_field": "str7 v2" + }, + { + "array_int": "[81,82]", + "binary_field": null, + "bool_field": true, + "float_field": 88.88, + "flow_document": "{\"_meta\":{\"uuid\":\"81ca0080-1dda-11b2-8000-071353030311\"},\"array_int\":[81,82],\"bool_field\":true,\"float_field\":88.88,\"id\":8,\"multiple\":null,\"nested\":{\"id\":\"i8\"},\"nullable_int\":8,\"str_field\":\"str8 v2\"}", + "flow_published_at": "1970-01-01 01:00:21 +0000 UTC", + "id": 8, + "multiple": null, + "nested": "{\"id\":\"i8\"}", + "nullable_int": 8, + "str_field": "str8 v2" + }, + { + "array_int": "[91,92]", + "binary_field": "YWxvaGEK", + "bool_field": false, + "float_field": 99.99, + "flow_document": "{\"_meta\":{\"uuid\":\"82629700-1dda-11b2-8000-071353030311\"},\"array_int\":[91,92],\"binary_field\":\"YWxvaGEK\",\"bool_field\":false,\"float_field\":99.99,\"id\":9,\"nested\":{\"id\":\"i9\"},\"nullable_int\":null,\"str_field\":\"str9 v2\"}", + "flow_published_at": "1970-01-01 01:00:22 +0000 UTC", + "id": 9, + "multiple": null, + "nested": "{\"id\":\"i9\"}", + "nullable_int": null, + "str_field": "str9 v2" + }, + { + "array_int": "[1,2]", + "binary_field": "c2F5xY1uYXJhCg==", + "bool_field": true, + "float_field": 1010.101, + "flow_document": "{\"_meta\":{\"uuid\":\"82fb2d80-1dda-11b2-8000-071353030311\"},\"array_int\":[1,2],\"binary_field\":\"c2F5xY1uYXJhCg==\",\"bool_field\":true,\"float_field\":1010.101,\"id\":10,\"nested\":{\"id\":\"i10\"},\"nullable_int\":10,\"str_field\":\"str10 v2\"}", + "flow_published_at": "1970-01-01 01:00:23 +0000 UTC", + "id": 10, + "multiple": null, + "nested": "{\"id\":\"i10\"}", + "nullable_int": 10, + "str_field": "str10 v2" + } + ] +} +{ + "_table": "formatted_strings", + "rows": [ + { + "date": "0001-01-01 00:00:00 +0000 UTC", + "datetime": "0001-01-01 00:00:00 +0000 UTC", + "flow_document": "{\"_meta\":{\"uuid\":\"7d054c80-1dda-11b2-8000-071353030311\"},\"date\":\"0000-01-01\",\"datetime\":\"0000-01-01T00:00:00Z\",\"id\":1,\"int_and_str\":1,\"int_str\":\"10\",\"num_and_str\":1.1,\"num_str\":\"10.1\",\"time\":\"00:00:00Z\"}", + "flow_published_at": "1970-01-01 01:00:13 +0000 UTC", + "id": 1, + "int_and_str": "MQ==", + "int_str": "MTA=", + "num_and_str": 1.1, + "num_str": 10.1, + "time": "0001-01-01 00:00:00 +0000 UTC" + }, + { + "date": "1999-02-02 00:00:00 +0000 UTC", + "datetime": "1999-02-02 14:20:12.33 +0000 UTC", + "flow_document": "{\"_meta\":{\"uuid\":\"7d9de300-1dda-11b2-8000-071353030311\"},\"date\":\"1999-02-02\",\"datetime\":\"1999-02-02T14:20:12.33Z\",\"id\":2,\"int_and_str\":2,\"int_str\":\"20\",\"num_and_str\":2.1,\"num_str\":\"20.1\",\"time\":\"14:20:12.33Z\"}", + "flow_published_at": "1970-01-01 01:00:14 +0000 UTC", + "id": 2, + "int_and_str": "Mg==", + "int_str": "MjA=", + "num_and_str": 2.1, + "num_str": 20.1, + "time": "0001-01-01 14:20:12.33 +0000 UTC" + }, + { + "date": "1000-03-03 00:00:00 +0000 UTC", + "datetime": "1000-03-03 23:59:38.1 +0000 UTC", + "flow_document": "{\"_meta\":{\"uuid\":\"1a0fb780-1dd2-11b2-8000-071353030311\"},\"date\":\"1000-03-03\",\"datetime\":\"1000-03-03T23:59:38.10Z\",\"id\":3,\"int_and_str\":3,\"int_str\":\"30\",\"num_and_str\":3.1,\"num_str\":\"30.1\",\"time\":\"23:59:38.10Z\"}", + "flow_published_at": "1970-01-01 00:00:11 +0000 UTC", + "id": 3, + "int_and_str": "Mw==", + "int_str": "MzA=", + "num_and_str": 3.1, + "num_str": 30.1, + "time": "0001-01-01 23:59:38.1 +0000 UTC" + }, + { + "date": "2023-08-29 00:00:00 +0000 UTC", + "datetime": "2023-08-29 23:59:38 +0000 UTC", + "flow_document": "{\"_meta\":{\"uuid\":\"1aa84e00-1dd2-11b2-8000-071353030311\"},\"date\":\"2023-08-29\",\"datetime\":\"2023-08-29T23:59:38Z\",\"id\":4,\"int_and_str\":\"4\",\"int_str\":\"40\",\"num_and_str\":\"4.1\",\"num_str\":\"40.1\",\"time\":\"23:59:38Z\"}", + "flow_published_at": "1970-01-01 00:00:12 +0000 UTC", + "id": 4, + "int_and_str": "NA==", + "int_str": "NDA=", + "num_and_str": 4.1, + "num_str": 40.1, + "time": "0001-01-01 23:59:38 +0000 UTC" + }, + { + "date": "9999-12-31 00:00:00 +0000 UTC", + "datetime": "9999-12-31 23:59:59 +0000 UTC", + "flow_document": "{\"_meta\":{\"uuid\":\"7e367980-1dda-11b2-8000-071353030311\"},\"date\":\"9999-12-31\",\"datetime\":\"9999-12-31T23:59:59Z\",\"id\":5,\"int_and_str\":\"5\",\"int_str\":\"50\",\"num_and_str\":\"5.1\",\"num_str\":\"50.1\",\"time\":\"23:59:59Z\"}", + "flow_published_at": "1970-01-01 01:00:15 +0000 UTC", + "id": 5, + "int_and_str": "NQ==", + "int_str": "NTA=", + "num_and_str": 5.1, + "num_str": 50.1, + "time": "0001-01-01 23:59:59 +0000 UTC" + }, + { + "date": null, + "datetime": null, + "flow_document": "{\"_meta\":{\"uuid\":\"7ecf1000-1dda-11b2-8000-071353030311\"},\"id\":8,\"num_str\":\"NaN\"}", + "flow_published_at": "1970-01-01 01:00:16 +0000 UTC", + "id": 8, + "int_and_str": null, + "int_str": null, + "num_and_str": null, + "num_str": null, + "time": null + }, + { + "date": null, + "datetime": null, + "flow_document": "{\"_meta\":{\"uuid\":\"7f67a680-1dda-11b2-8000-071353030311\"},\"id\":9,\"num_str\":\"Infinity\"}", + "flow_published_at": "1970-01-01 01:00:17 +0000 UTC", + "id": 9, + "int_and_str": null, + "int_str": null, + "num_and_str": null, + "num_str": null, + "time": null + }, + { + "date": null, + "datetime": null, + "flow_document": "{\"_meta\":{\"uuid\":\"80003d00-1dda-11b2-8000-071353030311\"},\"id\":10,\"num_str\":\"-Infinity\"}", + "flow_published_at": "1970-01-01 01:00:18 +0000 UTC", + "id": 10, + "int_and_str": null, + "int_str": null, + "num_and_str": null, + "num_str": null, + "time": null + } + ] +} +{ + "_table": "deletions", + "rows": [ + { + "_meta/op": "u", + "flow_document": "{\"_meta\":{\"op\":\"u\",\"uuid\":\"84c4f100-1dda-11b2-8000-071353030311\"},\"id\":2}", + "flow_published_at": "1970-01-01 01:00:26 +0000 UTC", + "id": 2 + }, + { + "_meta/op": "c", + "flow_document": "{\"_meta\":{\"op\":\"c\",\"uuid\":\"855d8780-1dda-11b2-8000-071353030311\"},\"id\":3}", + "flow_published_at": "1970-01-01 01:00:27 +0000 UTC", + "id": 3 + } + ] +} +{ + "_table": "binary_key", + "rows": [ + { + "counter": 1, + "flow_document": "{\"_meta\":{\"uuid\":\"87274b00-1dda-11b2-8000-071353030311\"},\"counter\":1,\"id\":\"c2F5xY1uYXJhCg==\"}", + "flow_published_at": "1970-01-01 01:00:30 +0000 UTC", + "id": "c2F5xY1uYXJhCg==" + } + ] +} +[ + "applied.actionDescription", + "" +] +[ + "connectorState", + { + "updated": {} + } +]