diff --git a/go/flow/builds.go b/go/flow/builds.go index eb983bd546..dd2778423d 100644 --- a/go/flow/builds.go +++ b/go/flow/builds.go @@ -5,9 +5,6 @@ import ( "database/sql" "fmt" "io" - "io/ioutil" - "net" - "net/http" "net/url" "os" "runtime" @@ -23,7 +20,7 @@ import ( type BuildService struct { baseURL *url.URL // URL to which buildIDs are joined. builds map[string]*sharedBuild // All active builds. - gsClient *storage.Client // Google storage client which is initalized on first use. + gsClient *storage.Client // Google storage client which is initialized on first use. mu sync.Mutex } @@ -43,11 +40,6 @@ type sharedBuild struct { dbTempfile *os.File dbErr error dbOnce sync.Once - - tsWorker *JSWorker - tsClient *http.Client - tsErr error - tsOnce sync.Once } // NewBuildService returns a new *BuildService. @@ -116,14 +108,6 @@ func (b *Build) Extract(fn func(*sql.DB) error) error { return fn(b.db) } -// TypeScriptLocalSocket returns the TypeScript Unix Domain Socket of this Catalog. -// If a TypeScript worker isn't running, one is started -// and will be stopped on a future call to Build.Close(). -func (b *Build) TypeScriptClient() (*http.Client, error) { - b.tsOnce.Do(func() { _ = b.initTypeScript() }) - return b.tsClient, b.tsErr -} - // Close the Build. If this is the last remaining reference, // then all allocated resources are cleaned up. func (b *Build) Close() error { @@ -184,46 +168,6 @@ func (b *Build) dbInit() (err error) { return nil } -func (b *Build) initTypeScript() (err error) { - defer func() { b.tsErr = err }() - - var npmPackage []byte - if err = b.Extract(func(d *sql.DB) error { - npmPackage, err = catalog.LoadNPMPackage(b.db) - return err - }); err != nil { - return fmt.Errorf("loading NPM package: %w", err) - } - - tsWorker, err := NewJSWorker(npmPackage) - if err != nil { - return fmt.Errorf("starting worker: %w", err) - } - b.tsWorker = tsWorker - - // HTTP/S client which dials the TypeScript server over the loopback - // for both cleartext and (fake) HTTPS connections. - // The latter is a requirement for HTTP/2 support over unix domain sockets. - // See also: https://www.mailgun.com/blog/http-2-cleartext-h2c-client-example-go/ - b.tsClient = &http.Client{ - Transport: &http.Transport{ - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return net.Dial("unix", tsWorker.socketPath) - }, - DialTLSContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return net.Dial("unix", tsWorker.socketPath) - }, - // Compression isn't desired over a local UDS transport. - DisableCompression: true, - // MaxConnsPerHost is the maximum concurrency with which - // we'll drive the lambda server. - MaxConnsPerHost: 8, - }, - } - - return nil -} - func (b *sharedBuild) destroy() error { if b.db == nil { // Nothing to close. @@ -239,12 +183,6 @@ func (b *sharedBuild) destroy() error { return fmt.Errorf("removing DB tempfile: %w", err) } - if b.tsWorker == nil { - // Nothing to stop. - } else if err := b.tsWorker.Stop(); err != nil { - return fmt.Errorf("stopping typescript worker: %w", err) - } - return nil } @@ -273,7 +211,7 @@ func fetchResource(svc *BuildService, resource *url.URL) (path string, tempfile } defer r.Close() - if tempfile, err = ioutil.TempFile("", "build"); err != nil { + if tempfile, err = os.CreateTemp("", "build"); err != nil { return "", nil, err } if _, err = io.Copy(tempfile, r); err != nil { diff --git a/go/flow/builds_test.go b/go/flow/builds_test.go index df0b4e9b1e..fe6af1bd07 100644 --- a/go/flow/builds_test.go +++ b/go/flow/builds_test.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "net/url" + "path" "runtime" "testing" @@ -54,18 +55,19 @@ func TestBuildReferenceCounting(t *testing.T) { } func TestBuildLazyInitAndReuse(t *testing.T) { + var dir = t.TempDir() var args = bindings.BuildArgs{ Context: context.Background(), FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "a-build-id", - Directory: t.TempDir(), + BuildDb: path.Join(dir, "a-build-id"), Source: "file:///specs_test.flow.yaml", SourceType: pf.ContentType_CATALOG, }} require.NoError(t, bindings.BuildCatalog(args)) - var builds, err = NewBuildService("file://" + args.Directory + "/") + var builds, err = NewBuildService("file://" + dir + "/") require.NoError(t, err) // Open. Expect DB is not initialized until first use. @@ -80,7 +82,7 @@ func TestBuildLazyInitAndReuse(t *testing.T) { collection, err = catalog.LoadCollection(db, "example/collection") return err })) - require.Equal(t, "example/collection", collection.Collection.String()) + require.Equal(t, "example/collection", collection.Name.String()) // Database was initialized. var db1 = b1.db @@ -95,12 +97,6 @@ func TestBuildLazyInitAndReuse(t *testing.T) { return nil })) - // Our fixture doesn't build a typescript package, so initialization - // fails with an error. Expect the error is shared. - _, err = b1.TypeScriptClient() - require.Error(t, err) - require.Equal(t, err, b2.tsErr) - // Close both builds, dropping the reference count to zero. require.NoError(t, b1.Close()) require.NoError(t, b2.Close()) @@ -114,7 +110,7 @@ func TestBuildLazyInitAndReuse(t *testing.T) { collection, err = catalog.LoadCollection(db, "example/collection") return err })) - require.Equal(t, "example/collection", collection.Collection.String()) + require.Equal(t, "example/collection", collection.Name.String()) } func TestInitOfMissingBuild(t *testing.T) { diff --git a/go/flow/converge.go b/go/flow/converge.go index 9d079cb2dd..0f9f740f23 100644 --- a/go/flow/converge.go +++ b/go/flow/converge.go @@ -8,6 +8,7 @@ import ( "github.com/estuary/flow/go/labels" pf "github.com/estuary/flow/go/protocols/flow" + "github.com/estuary/flow/go/protocols/ops" "go.gazette.dev/core/broker/client" pb "go.gazette.dev/core/broker/protocol" "go.gazette.dev/core/consumer" @@ -21,7 +22,7 @@ func ListShardsRequest(task pf.Task) pc.ListRequest { Selector: pb.LabelSelector{ Include: pb.MustLabelSet( labels.TaskName, task.TaskName(), - labels.TaskType, taskType(task), + labels.TaskType, taskType(task).String(), ), }, } @@ -34,7 +35,7 @@ func ListRecoveryLogsRequest(task pf.Task) pb.ListRequest { Include: pb.MustLabelSet( glabels.ContentType, glabels.ContentType_RecoveryLog, labels.TaskName, task.TaskName(), - labels.TaskType, taskType(task), + labels.TaskType, taskType(task).String(), ), }, } @@ -44,7 +45,7 @@ func ListRecoveryLogsRequest(task pf.Task) pb.ListRequest { func ListPartitionsRequest(collection *pf.CollectionSpec) pb.ListRequest { return pb.ListRequest{ Selector: pf.LabelSelector{ - Include: pb.MustLabelSet(labels.Collection, collection.Collection.String()), + Include: pb.MustLabelSet(labels.Collection, collection.Name.String()), }, } } @@ -356,14 +357,14 @@ func ActivationChanges( for _, collection := range collections { var resp, err = client.ListAllJournals(ctx, jc, ListPartitionsRequest(collection)) if err != nil { - return nil, nil, fmt.Errorf("listing partitions of %s: %w", collection.Collection, err) + return nil, nil, fmt.Errorf("listing partitions of %s: %w", collection.Name, err) } var desired = MapPartitionsToCurrentSplits(resp.Journals) journals, err = CollectionChanges(collection, resp.Journals, desired, journals) if err != nil { - return nil, nil, fmt.Errorf("processing collection %s: %w", collection.Collection, err) + return nil, nil, fmt.Errorf("processing collection %s: %w", collection.Name, err) } } @@ -411,7 +412,7 @@ func DeletionChanges( for _, collection := range collections { var resp, err = client.ListAllJournals(ctx, jc, ListPartitionsRequest(collection)) if err != nil { - return nil, nil, fmt.Errorf("listing partitions of %s: %w", collection.Collection, err) + return nil, nil, fmt.Errorf("listing partitions of %s: %w", collection.Name, err) } for _, cur := range resp.Journals { @@ -452,14 +453,14 @@ func DeletionChanges( } // taskType returns the label matching this Task. -func taskType(task pf.Task) string { +func taskType(task pf.Task) ops.TaskType { switch task.(type) { case *pf.CaptureSpec: - return labels.TaskTypeCapture - case *pf.DerivationSpec: - return labels.TaskTypeDerivation + return ops.TaskType_capture + case *pf.CollectionSpec: + return ops.TaskType_derivation case *pf.MaterializationSpec: - return labels.TaskTypeMaterialization + return ops.TaskType_materialization default: panic(task) } diff --git a/go/flow/converge_test.go b/go/flow/converge_test.go index 3c1d021e53..b8f413c1e3 100644 --- a/go/flow/converge_test.go +++ b/go/flow/converge_test.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "fmt" + "path" "testing" "github.com/bradleyjkemp/cupaloy" @@ -24,20 +25,20 @@ func TestConvergence(t *testing.T) { FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "fixture", - Directory: t.TempDir(), + BuildDb: path.Join(t.TempDir(), "build.db"), Source: "file:///specs_test.flow.yaml", SourceType: pf.ContentType_CATALOG, }} require.NoError(t, bindings.BuildCatalog(args)) var collection *pf.CollectionSpec - var derivation *pf.DerivationSpec + var derivation *pf.CollectionSpec - require.NoError(t, catalog.Extract(args.OutputPath(), func(db *sql.DB) (err error) { + require.NoError(t, catalog.Extract(args.BuildDb, func(db *sql.DB) (err error) { if collection, err = catalog.LoadCollection(db, "example/collection"); err != nil { return err } - derivation, err = catalog.LoadDerivation(db, "example/derivation") + derivation, err = catalog.LoadCollection(db, "example/derivation") return err })) @@ -70,7 +71,7 @@ func TestConvergence(t *testing.T) { ))) require.NoError(t, err) - shardSpec1, err := BuildShardSpec(derivation.ShardTemplate, + shardSpec1, err := BuildShardSpec(derivation.Derivation.ShardTemplate, labels.EncodeRange(pf.RangeSpec{ KeyBegin: 0x10000000, KeyEnd: 0x2fffffff, @@ -79,9 +80,9 @@ func TestConvergence(t *testing.T) { }, pf.LabelSet{}), ) require.NoError(t, err) - logSpec1 := BuildRecoverySpec(derivation.RecoveryLogTemplate, shardSpec1) + logSpec1 := BuildRecoverySpec(derivation.Derivation.RecoveryLogTemplate, shardSpec1) - shardSpec2, err := BuildShardSpec(derivation.ShardTemplate, + shardSpec2, err := BuildShardSpec(derivation.Derivation.ShardTemplate, labels.EncodeRange(pf.RangeSpec{ KeyBegin: 0x30000000, KeyEnd: 0x3fffffff, @@ -90,9 +91,9 @@ func TestConvergence(t *testing.T) { }, pf.LabelSet{}), ) require.NoError(t, err) - logSpec2 := BuildRecoverySpec(derivation.RecoveryLogTemplate, shardSpec2) + logSpec2 := BuildRecoverySpec(derivation.Derivation.RecoveryLogTemplate, shardSpec2) - shardSpec3, err := BuildShardSpec(derivation.ShardTemplate, + shardSpec3, err := BuildShardSpec(derivation.Derivation.ShardTemplate, labels.EncodeRange(pf.RangeSpec{ KeyBegin: 0x30000000, KeyEnd: 0x3fffffff, @@ -101,7 +102,7 @@ func TestConvergence(t *testing.T) { }, pf.LabelSet{}), ) require.NoError(t, err) - logSpec3 := BuildRecoverySpec(derivation.RecoveryLogTemplate, shardSpec3) + logSpec3 := BuildRecoverySpec(derivation.Derivation.RecoveryLogTemplate, shardSpec3) var allPartitions = []pb.ListResponse_Journal{ {Spec: *partitionSpec1, ModRevision: 11}, @@ -162,7 +163,7 @@ func TestConvergence(t *testing.T) { }) t.Run("shard-split-errors", func(t *testing.T) { - var shard, err = BuildShardSpec(derivation.ShardTemplate, + var shard, err = BuildShardSpec(derivation.Derivation.ShardTemplate, labels.EncodeRange(pf.RangeSpec{ KeyEnd: 0x10000000, RClockEnd: 0x10000000, @@ -330,7 +331,7 @@ func TestConvergence(t *testing.T) { var ctx = context.Background() var jc = &mockJournals{ collections: map[string]*pb.ListResponse{ - collection.Collection.String(): {Journals: allPartitions}, + collection.Name.String(): {Journals: allPartitions}, }, logs: map[string]*pb.ListResponse{ derivation.TaskName(): {Journals: allLogs}, @@ -361,7 +362,7 @@ func TestConvergence(t *testing.T) { var ctx = context.Background() var jc = &mockJournals{ collections: map[string]*pb.ListResponse{ - collection.Collection.String(): {Journals: allPartitions}, + collection.Name.String(): {Journals: allPartitions}, }, logs: map[string]*pb.ListResponse{ derivation.TaskName(): {Journals: allLogs}, diff --git a/go/flow/mapping.go b/go/flow/mapping.go index 7ddfa533b7..9f48c4f057 100644 --- a/go/flow/mapping.go +++ b/go/flow/mapping.go @@ -73,7 +73,7 @@ func NewMapper( func PartitionPointers(spec *pf.CollectionSpec) []string { var ptrs = make([]string, len(spec.PartitionFields)) for i, field := range spec.PartitionFields { - ptrs[i] = pf.GetProjectionByField(field, spec.Projections).Ptr + ptrs[i] = spec.GetProjection(field).Ptr } return ptrs } @@ -174,7 +174,7 @@ func (m *Mapper) Map(mappable message.Mappable) (pb.Journal, string, error) { "journal": applySpec.Name, "readThrough": readThrough, }).Info("created partition") - createdPartitionsCounters.WithLabelValues(msg.Spec.Collection.String()).Inc() + createdPartitionsCounters.WithLabelValues(msg.Spec.Name.String()).Inc() } m.journals.Mu.RLock() @@ -274,7 +274,7 @@ func (m Mappable) SetUUID(uuid message.UUID) { func NewAcknowledgementMessage(spec *pf.CollectionSpec) Mappable { return Mappable{ Spec: spec, - Doc: append(json.RawMessage(nil), spec.AckJsonTemplate...), + Doc: append(json.RawMessage(nil), spec.AckTemplateJson...), } } diff --git a/go/flow/mapping_test.go b/go/flow/mapping_test.go index ae7c9119c0..08ab5fc46d 100644 --- a/go/flow/mapping_test.go +++ b/go/flow/mapping_test.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "fmt" + "path" "testing" "time" @@ -198,14 +199,14 @@ func buildCombineFixtures(t *testing.T) []Mappable { FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "fixture", - Directory: t.TempDir(), + BuildDb: path.Join(t.TempDir(), "build.db"), Source: "file:///mapping_test.flow.yaml", SourceType: pf.ContentType_CATALOG, }} require.NoError(t, bindings.BuildCatalog(args)) var spec *pf.CollectionSpec - require.NoError(t, catalog.Extract(args.OutputPath(), func(db *sql.DB) (err error) { + require.NoError(t, catalog.Extract(args.BuildDb, func(db *sql.DB) (err error) { spec, err = catalog.LoadCollection(db, "a/collection") return err })) diff --git a/go/flow/specs_test.go b/go/flow/specs_test.go index bee56a4b6e..fb0d5ee6f2 100644 --- a/go/flow/specs_test.go +++ b/go/flow/specs_test.go @@ -3,6 +3,7 @@ package flow import ( "context" "database/sql" + "path" "testing" "github.com/bradleyjkemp/cupaloy" @@ -21,20 +22,20 @@ func TestBuildingSpecs(t *testing.T) { FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "fixture", - Directory: t.TempDir(), + BuildDb: path.Join(t.TempDir(), "build.db"), Source: "file:///specs_test.flow.yaml", SourceType: pf.ContentType_CATALOG, }} require.NoError(t, bindings.BuildCatalog(args)) var collection *pf.CollectionSpec - var derivation *pf.DerivationSpec + var derivation *pf.CollectionSpec - require.NoError(t, catalog.Extract(args.OutputPath(), func(db *sql.DB) (err error) { + require.NoError(t, catalog.Extract(args.BuildDb, func(db *sql.DB) (err error) { if collection, err = catalog.LoadCollection(db, "example/collection"); err != nil { return err } - derivation, err = catalog.LoadDerivation(db, "example/derivation") + derivation, err = catalog.LoadCollection(db, "example/derivation") return err })) @@ -56,17 +57,17 @@ func TestBuildingSpecs(t *testing.T) { RClockEnd: 61514131, }, set) - shard, err := BuildShardSpec(derivation.ShardTemplate, set) + shard, err := BuildShardSpec(derivation.Derivation.ShardTemplate, set) require.NoError(t, err) // Build a derivation shard that's currently splitting from its source. set.AddValue(flowLabels.SplitSource, "something/something") - shardSplitSource, err := BuildShardSpec(derivation.ShardTemplate, set) + shardSplitSource, err := BuildShardSpec(derivation.Derivation.ShardTemplate, set) require.NoError(t, err) // Build a recovery log. - var recovery = BuildRecoverySpec(derivation.RecoveryLogTemplate, shard) + var recovery = BuildRecoverySpec(derivation.Derivation.RecoveryLogTemplate, shard) // Snapshot all specs. cupaloy.SnapshotT(t, @@ -87,14 +88,14 @@ func TestBuildingSpecs(t *testing.T) { require.NoError(t, err) require.Equal(t, partition, partition2) - shard2, err := BuildShardSpec(derivation.ShardTemplate, shard.LabelSet) + shard2, err := BuildShardSpec(derivation.Derivation.ShardTemplate, shard.LabelSet) require.NoError(t, err) require.Equal(t, shard, shard2) - shard2, err = BuildShardSpec(derivation.ShardTemplate, shardSplitSource.LabelSet) + shard2, err = BuildShardSpec(derivation.Derivation.ShardTemplate, shardSplitSource.LabelSet) require.NoError(t, err) require.Equal(t, shardSplitSource, shard2) - var recovery2 = BuildRecoverySpec(derivation.RecoveryLogTemplate, shard2) + var recovery2 = BuildRecoverySpec(derivation.Derivation.RecoveryLogTemplate, shard2) require.Equal(t, recovery, recovery2) } diff --git a/go/flow/testdata/specs_test.flow.yaml b/go/flow/testdata/specs_test.flow.yaml index f506ecfcba..70891ab7ca 100644 --- a/go/flow/testdata/specs_test.flow.yaml +++ b/go/flow/testdata/specs_test.flow.yaml @@ -24,15 +24,20 @@ collections: example/derivation: schema: *schema key: [/a_key] - derivation: - typescript: - module: | - import stuff; - doStuff(); - transform: - fromCollection: + derive: + using: + typescript: + module: | + import { IDerivation, Document, SourceSwizzle } from 'flow/example/derivation.ts'; + + export class Derivation extends IDerivation { + swizzle(source: { doc: SourceSwizzle }): Document[] { + return []; + } + } + transforms: + - name: swizzle source: { name: example/collection } - publish: { lambda: typescript } shards: # Expect this is reflected in shard specs. maxTxnDuration: 60s diff --git a/go/flowctl-go/cmd-api-activate.go b/go/flowctl-go/cmd-api-activate.go index 1144f6e32d..97f91b90a3 100644 --- a/go/flowctl-go/cmd-api-activate.go +++ b/go/flowctl-go/cmd-api-activate.go @@ -14,11 +14,11 @@ import ( "github.com/estuary/flow/go/connector" "github.com/estuary/flow/go/flow" "github.com/estuary/flow/go/labels" - "github.com/estuary/flow/go/ops" pfc "github.com/estuary/flow/go/protocols/capture" "github.com/estuary/flow/go/protocols/catalog" pf "github.com/estuary/flow/go/protocols/flow" pm "github.com/estuary/flow/go/protocols/materialize" + "github.com/estuary/flow/go/protocols/ops" log "github.com/sirupsen/logrus" "go.gazette.dev/core/broker/client" pb "go.gazette.dev/core/broker/protocol" @@ -86,39 +86,40 @@ func (cmd apiActivate) execute(ctx context.Context) error { var publisher = ops.NewLocalPublisher(labels.ShardLabeling{ Build: spec.ShardTemplate.LabelSet.ValueOf(labels.Build), TaskName: spec.TaskName(), - TaskType: labels.TaskTypeCapture, + TaskType: ops.TaskType_capture, }) if spec.ShardTemplate.Disable { - log.WithField("capture", spec.Capture.String()). + log.WithField("capture", spec.Name). Info("Will skip applying capture because it's shards are disabled") continue } - var request = &pfc.ApplyRequest{ - Capture: spec, - Version: publisher.Labels().Build, - DryRun: cmd.DryRun, + var request = &pfc.Request{ + Apply: &pfc.Request_Apply{ + Capture: spec, + Version: publisher.Labels().Build, + DryRun: cmd.DryRun, + }, } - var response, err = connector.Invoke( + var response, err = connector.Invoke[pfc.Response]( ctx, request, cmd.Network, publisher, - func(driver *connector.Driver, request *pfc.ApplyRequest) (*pfc.ApplyResponse, error) { - return driver.CaptureClient().ApplyUpsert(ctx, request) + func(driver *connector.Driver) (pfc.Connector_CaptureClient, error) { + return driver.CaptureClient().Capture(ctx) }, ) if err != nil { - return fmt.Errorf("applying capture %q: %w", spec.Capture, err) + return fmt.Errorf("applying capture %q: %w", spec.Name, err) } - if response.ActionDescription != "" { - fmt.Println("Applying capture ", spec.Capture, ":") - fmt.Println(response.ActionDescription) + if response.Applied != nil && response.Applied.ActionDescription != "" { + fmt.Println("Applying capture ", spec.Name, ":") + fmt.Println(response.Applied.ActionDescription) } - log.WithFields(log.Fields{"name": spec.Capture}). - Info("applied capture to endpoint") + log.WithFields(log.Fields{"name": spec.Name}).Info("applied capture to endpoint") } // As with captures, apply materializations before we create or update the @@ -131,39 +132,40 @@ func (cmd apiActivate) execute(ctx context.Context) error { var publisher = ops.NewLocalPublisher(labels.ShardLabeling{ Build: spec.ShardTemplate.LabelSet.ValueOf(labels.Build), TaskName: spec.TaskName(), - TaskType: labels.TaskTypeMaterialization, + TaskType: ops.TaskType_materialization, }) if spec.ShardTemplate.Disable { - log.WithField("materialization", spec.Materialization.String()). + log.WithField("materialization", spec.Name). Info("Will skip applying materialization because it's shards are disabled") continue } - var request = &pm.ApplyRequest{ - Materialization: spec, - Version: publisher.Labels().Build, - DryRun: cmd.DryRun, + var request = &pm.Request{ + Apply: &pm.Request_Apply{ + Materialization: spec, + Version: publisher.Labels().Build, + DryRun: cmd.DryRun, + }, } - var response, err = connector.Invoke( + var response, err = connector.Invoke[pm.Response]( ctx, request, cmd.Network, publisher, - func(driver *connector.Driver, request *pm.ApplyRequest) (*pm.ApplyResponse, error) { - return driver.MaterializeClient().ApplyUpsert(ctx, request) + func(driver *connector.Driver) (pm.Connector_MaterializeClient, error) { + return driver.MaterializeClient().Materialize(ctx) }, ) if err != nil { - return fmt.Errorf("applying materialization %q: %w", spec.Materialization, err) + return fmt.Errorf("applying materialization %q: %w", spec.Name, err) } - if response.ActionDescription != "" { - fmt.Println("Applying materialization ", spec.Materialization, ":") - fmt.Println(response.ActionDescription) + if response.Applied != nil && response.Applied.ActionDescription != "" { + fmt.Println("Applying materialization ", spec.Name, ":") + fmt.Println(response.Applied.ActionDescription) } - log.WithFields(log.Fields{"name": spec.Materialization}). - Info("applied materialization to endpoint") + log.WithFields(log.Fields{"name": spec.Name}).Info("applied materialization to endpoint") } shards, journals, err := flow.ActivationChanges(ctx, rjc, sc, collections, tasks, cmd.InitialSplits) @@ -171,7 +173,7 @@ func (cmd apiActivate) execute(ctx context.Context) error { return err } if err = applyAllChanges(ctx, sc, rjc, shards, journals, cmd.DryRun); err == errNoChangesToApply { - log.Warn("there are no changes to apply") + log.Info("there are no changes to apply") } else if err != nil { return err } @@ -364,11 +366,15 @@ func loadFromCatalog(db *sql.DB, names []string, all, allDerivations bool) ([]*p return nil, nil, err } else { for _, c := range loaded { - var name = c.Collection.String() + var name = c.Name.String() var _, ok = idx[name] if ok || all || allDerivations { collections = append(collections, c) idx[name] = idx[name] + 1 + + if c.Derivation != nil { + tasks = append(tasks, c) + } } } } @@ -383,17 +389,6 @@ func loadFromCatalog(db *sql.DB, names []string, all, allDerivations bool) ([]*p } } } - if loaded, err := catalog.LoadAllDerivations(db); err != nil { - return nil, nil, err - } else { - for _, t := range loaded { - var _, ok = idx[t.TaskName()] - if ok || all || allDerivations { - tasks = append(tasks, t) - idx[t.TaskName()] = idx[t.TaskName()] + 1 - } - } - } if loaded, err := catalog.LoadAllMaterializations(db); err != nil { return nil, nil, err } else { diff --git a/go/flowctl-go/cmd-api-await.go b/go/flowctl-go/cmd-api-await.go index 83cea142cd..5894030e32 100644 --- a/go/flowctl-go/cmd-api-await.go +++ b/go/flowctl-go/cmd-api-await.go @@ -48,7 +48,6 @@ func (cmd apiAwait) execute(ctx context.Context) error { // Load collections and tasks. var collections []*pf.CollectionSpec var captures []*pf.CaptureSpec - var derivations []*pf.DerivationSpec var materializations []*pf.MaterializationSpec if err := build.Extract(func(db *sql.DB) error { @@ -58,9 +57,6 @@ func (cmd apiAwait) execute(ctx context.Context) error { if captures, err = catalog.LoadAllCaptures(db); err != nil { return err } - if derivations, err = catalog.LoadAllDerivations(db); err != nil { - return err - } if materializations, err = catalog.LoadAllMaterializations(db); err != nil { return err } @@ -75,21 +71,17 @@ func (cmd apiAwait) execute(ctx context.Context) error { if err != nil { return fmt.Errorf("building test driver: %w", err) } - var graph = testing.NewGraph(captures, derivations, materializations) + var graph = testing.NewGraph(captures, collections, materializations) // "Ingest" the capture EOF pseudo-journal to mark // capture tasks as having a pending stat, which is recursively tracked // through derivations and materializations of the catalog. for _, capture := range captures { - if capture.EndpointType == pf.EndpointType_INGEST { - continue // Skip ingestions, which never EOF. - } - graph.CompletedIngest( - pf.Collection(capture.Capture), + pf.Collection(capture.Name), &testing.Clock{ Etcd: *brokerHeader, - Offsets: pb.Offsets{pb.Journal(fmt.Sprintf("%s/eof", capture.Capture)): 1}, + Offsets: pb.Offsets{pb.Journal(fmt.Sprintf("%s/eof", capture.Name)): 1}, }, ) } diff --git a/go/flowctl-go/cmd-api-build.go b/go/flowctl-go/cmd-api-build.go index 028d596bee..80e66db139 100644 --- a/go/flowctl-go/cmd-api-build.go +++ b/go/flowctl-go/cmd-api-build.go @@ -6,7 +6,6 @@ import ( "fmt" "net/url" "path/filepath" - "strings" "time" "github.com/estuary/flow/go/bindings" @@ -20,24 +19,16 @@ import ( type apiBuild struct { BuildID string `long:"build-id" required:"true" description:"ID of this build"` - Directory string `long:"directory" default:"." description:"Build directory"` + BuildDB string `long:"build-db" required:"true" description:"Output build database"` FileRoot string `long:"fs-root" default:"/" description:"Filesystem root of fetched file:// resources"` Network string `long:"network" description:"The Docker network that connector containers are given access to."` Source string `long:"source" required:"true" description:"Catalog source file or URL to build"` SourceType string `long:"source-type" default:"catalog" choice:"catalog" choice:"jsonSchema" description:"Type of the source to build."` - TSCompile bool `long:"ts-compile" description:"Should TypeScript modules be compiled and linted? Implies generation."` - TSGenerate bool `long:"ts-generate" description:"Should TypeScript types be generated?"` - TSPackage bool `long:"ts-package" description:"Should TypeScript modules be packaged? Implies generation and compilation."` Log mbp.LogConfig `group:"Logging" namespace:"log" env-namespace:"LOG"` Diagnostics mbp.DiagnosticsConfig `group:"Debug" namespace:"debug" env-namespace:"DEBUG"` } func (cmd apiBuild) execute(ctx context.Context) error { - var err error - if cmd.Directory, err = filepath.Abs(cmd.Directory); err != nil { - return fmt.Errorf("filepath.Abs: %w", err) - } - var sourceType pf.ContentType switch cmd.SourceType { case "catalog": @@ -50,14 +41,10 @@ func (cmd apiBuild) execute(ctx context.Context) error { Context: ctx, BuildAPI_Config: pf.BuildAPI_Config{ BuildId: cmd.BuildID, - Directory: cmd.Directory, + BuildDb: cmd.BuildDB, Source: cmd.Source, SourceType: sourceType, ConnectorNetwork: cmd.Network, - - TypescriptGenerate: cmd.TSGenerate, - TypescriptCompile: cmd.TSCompile, - TypescriptPackage: cmd.TSPackage, }, FileRoot: cmd.FileRoot, } @@ -68,7 +55,7 @@ func (cmd apiBuild) execute(ctx context.Context) error { // We manually open the database, rather than use catalog.Extract, // because we explicitly check for and handle errors. // Essentially all other accesses of the catalog DB should prefer catalog.Extract. - db, err := sql.Open("sqlite3", fmt.Sprintf("file://%s?mode=ro", args.OutputPath())) + var db, err = sql.Open("sqlite3", fmt.Sprintf("file://%s?mode=ro", args.BuildDb)) if err != nil { return fmt.Errorf("opening DB: %w", err) } @@ -80,7 +67,7 @@ func (cmd apiBuild) execute(ctx context.Context) error { } for _, be := range errors { - var path, ptr = scopeToPathAndPtr(args.Directory, be.Scope) + var path, ptr = scopeToPathAndPtr(args.Source, be.Scope) fmt.Println(yellow(path), "error at", red(ptr), ":") fmt.Println(be.Error) } @@ -107,18 +94,34 @@ func (cmd apiBuild) Execute(_ []string) error { return cmd.execute(ctx) } -func scopeToPathAndPtr(dir, scope string) (path, ptr string) { - u, err := url.Parse(scope) +func scopeToPathAndPtr(source, scope string) (path, ptr string) { + sourceURL, err := url.Parse(source) if err != nil { panic(err) } - ptr, u.Fragment = u.Fragment, "" - path = u.String() + // If `source` is relative, attempt to resolve it as an absolute path to a local file. + if !sourceURL.IsAbs() { + if abs, err := filepath.Abs(source); err == nil { + sourceURL.Scheme = "file" + sourceURL.Path = abs + } + } + + scopeURL, err := url.Parse(scope) + if err != nil { + panic(err) + } - if u.Scheme == "file" && strings.HasPrefix(u.Path, dir) { - path = path[len(dir)+len("file://")+1:] + if sourceURL.Scheme == "file" && scopeURL.Scheme == "file" { + if rel, err := filepath.Rel(filepath.Dir(sourceURL.Path), scopeURL.Path); err == nil { + return rel, scopeURL.Fragment + } } + + ptr, scopeURL.Fragment = scopeURL.Fragment, "" + path = scopeURL.String() + if ptr == "" { ptr = "" } diff --git a/go/flowctl-go/cmd-api-delete.go b/go/flowctl-go/cmd-api-delete.go index 5151bb9ef0..e19488c4fb 100644 --- a/go/flowctl-go/cmd-api-delete.go +++ b/go/flowctl-go/cmd-api-delete.go @@ -8,11 +8,10 @@ import ( "github.com/estuary/flow/go/connector" "github.com/estuary/flow/go/flow" "github.com/estuary/flow/go/labels" - "github.com/estuary/flow/go/ops" - pc "github.com/estuary/flow/go/protocols/capture" pfc "github.com/estuary/flow/go/protocols/capture" pf "github.com/estuary/flow/go/protocols/flow" pm "github.com/estuary/flow/go/protocols/materialize" + "github.com/estuary/flow/go/protocols/ops" log "github.com/sirupsen/logrus" pb "go.gazette.dev/core/broker/protocol" mbp "go.gazette.dev/core/mainboilerplate" @@ -70,7 +69,7 @@ func (cmd apiDelete) execute(ctx context.Context) error { return err } if err = applyAllChanges(ctx, sc, rjc, shards, journals, cmd.DryRun); err == errNoChangesToApply { - log.Warn("there are no changes to apply") + log.Info("there are no changes to apply") } else if err != nil { return err } @@ -83,42 +82,46 @@ func (cmd apiDelete) execute(ctx context.Context) error { continue } var publisher = ops.NewLocalPublisher(labels.ShardLabeling{ - TaskName: spec.TaskName(), - TaskType: labels.TaskTypeCapture, Build: spec.ShardTemplate.LabelSet.ValueOf(labels.Build), + TaskName: spec.TaskName(), + TaskType: ops.TaskType_capture, }) if spec.ShardTemplate.Disable { - log.WithField("capture", spec.Capture.String()). - Info("Will skip un-applying capture because it's disabled") + log.WithField("capture", spec.Name.String()). + Info("Will skip deleting capture because it's disabled") continue } - var request = &pc.ApplyRequest{ - Capture: spec, - Version: publisher.Labels().Build, - DryRun: cmd.DryRun, + // Communicate a deletion to the connector as a semantic apply of this capture with no bindings. + spec.Bindings = nil + + var request = &pfc.Request{ + Apply: &pfc.Request_Apply{ + Capture: spec, + Version: publisher.Labels().Build, + DryRun: cmd.DryRun, + }, } - var response, err = connector.Invoke( + var response, err = connector.Invoke[pfc.Response]( ctx, request, cmd.Network, publisher, - func(driver *connector.Driver, request *pfc.ApplyRequest) (*pfc.ApplyResponse, error) { - return driver.CaptureClient().ApplyDelete(ctx, request) + func(driver *connector.Driver) (pfc.Connector_CaptureClient, error) { + return driver.CaptureClient().Capture(ctx) }, ) if err != nil { - return fmt.Errorf("deleting capture %q: %w", spec.Capture, err) + return fmt.Errorf("deleting capture %q: %w", spec.Name, err) } - if response.ActionDescription != "" { - fmt.Println("Deleting capture ", spec.Capture, ":") - fmt.Println(response.ActionDescription) + if response.Applied != nil && response.Applied.ActionDescription != "" { + fmt.Println("Deleting capture ", spec.Name, ":") + fmt.Println(response.Applied.ActionDescription) } - log.WithFields(log.Fields{"name": spec.Capture}). - Info("deleted capture from endpoint") + log.WithFields(log.Fields{"name": spec.Name}).Info("deleted capture from endpoint") } // Remove materializations from endpoints, now that we've deleted the @@ -128,42 +131,47 @@ func (cmd apiDelete) execute(ctx context.Context) error { if !ok { continue } - var publisher = ops.NewLocalPublisher(labels.ShardLabeling{ - TaskName: spec.TaskName(), - TaskType: labels.TaskTypeCapture, + var publisher = ops.NewLocalPublisher(ops.ShardLabeling{ Build: spec.ShardTemplate.LabelSet.ValueOf(labels.Build), + TaskName: spec.TaskName(), + TaskType: ops.TaskType_materialization, }) if spec.ShardTemplate.Disable { - log.WithField("materialization", spec.Materialization.String()). - Info("Will skip un-applying materialization because it's disabled") + log.WithField("materialization", spec.Name.String()). + Info("Will skip deleting materialization because it's disabled") continue } - var request = &pm.ApplyRequest{ - Materialization: spec, - Version: publisher.Labels().Build, - DryRun: cmd.DryRun, + // Communicate a deletion to the connector as a semantic apply of this materialization with no bindings. + spec.Bindings = nil + + var request = &pm.Request{ + Apply: &pm.Request_Apply{ + Materialization: spec, + Version: publisher.Labels().Build, + DryRun: cmd.DryRun, + }, } - var response, err = connector.Invoke( + var response, err = connector.Invoke[pm.Response]( ctx, request, cmd.Network, publisher, - func(driver *connector.Driver, request *pm.ApplyRequest) (*pm.ApplyResponse, error) { - return driver.MaterializeClient().ApplyDelete(ctx, request) + func(driver *connector.Driver) (pm.Connector_MaterializeClient, error) { + return driver.MaterializeClient().Materialize(ctx) }, ) if err != nil { - return fmt.Errorf("deleting materialization %q: %w", spec.Materialization, err) + return fmt.Errorf("deleting materialization %q: %w", spec.Name, err) } - if response.ActionDescription != "" { - fmt.Println("Deleting materialization ", spec.Materialization, ":") - fmt.Println(response.ActionDescription) + if response.Applied != nil && response.Applied.ActionDescription != "" { + fmt.Println("Deleting materialization ", spec.Name, ":") + fmt.Println(response.Applied.ActionDescription) } - log.WithFields(log.Fields{"name": spec.Materialization}). + log.WithFields(log.Fields{"name": spec.Name}). Info("deleted materialization from endpoint") } diff --git a/go/flowctl-go/cmd-api-discover.go b/go/flowctl-go/cmd-api-discover.go index 78c2861ea6..e52cc62663 100644 --- a/go/flowctl-go/cmd-api-discover.go +++ b/go/flowctl-go/cmd-api-discover.go @@ -9,9 +9,10 @@ import ( "github.com/estuary/flow/go/connector" "github.com/estuary/flow/go/labels" - "github.com/estuary/flow/go/ops" pc "github.com/estuary/flow/go/protocols/capture" + pfc "github.com/estuary/flow/go/protocols/capture" pf "github.com/estuary/flow/go/protocols/flow" + "github.com/estuary/flow/go/protocols/ops" "github.com/gogo/protobuf/jsonpb" "github.com/sirupsen/logrus" pb "go.gazette.dev/core/broker/protocol" @@ -29,7 +30,7 @@ type apiDiscover struct { Output string `long:"output" choice:"json" choice:"proto" default:"json"` } -func (cmd apiDiscover) execute(ctx context.Context) (*pc.DiscoverResponse, error) { +func (cmd apiDiscover) execute(ctx context.Context) (*pc.Response_Discovered, error) { var config, err = readConfig(cmd.Config) if err != nil { return nil, err @@ -49,19 +50,25 @@ func (cmd apiDiscover) execute(ctx context.Context) (*pc.DiscoverResponse, error TaskName: cmd.Name, }) - var request = &pc.DiscoverRequest{ - EndpointType: pf.EndpointType_AIRBYTE_SOURCE, - EndpointSpecJson: spec, + var request = &pc.Request{ + Discover: &pc.Request_Discover{ + ConnectorType: pf.CaptureSpec_IMAGE, + ConfigJson: spec, + }, } - return connector.Invoke( + response, err := connector.Invoke[pfc.Response]( ctx, request, cmd.Network, publisher, - func(driver *connector.Driver, request *pc.DiscoverRequest) (*pc.DiscoverResponse, error) { - return driver.CaptureClient().Discover(ctx, request) + func(driver *connector.Driver) (pfc.Connector_CaptureClient, error) { + return driver.CaptureClient().Capture(ctx) }, ) + if err != nil { + return nil, err + } + return response.Discovered, nil } func (cmd apiDiscover) Execute(_ []string) error { diff --git a/go/flowctl-go/cmd-api-spec.go b/go/flowctl-go/cmd-api-spec.go index 21f8cfbf6e..2a775f65db 100644 --- a/go/flowctl-go/cmd-api-spec.go +++ b/go/flowctl-go/cmd-api-spec.go @@ -13,21 +13,21 @@ import ( "github.com/estuary/flow/go/connector" "github.com/estuary/flow/go/labels" - "github.com/estuary/flow/go/ops" pc "github.com/estuary/flow/go/protocols/capture" - "github.com/estuary/flow/go/protocols/flow" + pf "github.com/estuary/flow/go/protocols/flow" pm "github.com/estuary/flow/go/protocols/materialize" + "github.com/estuary/flow/go/protocols/ops" "github.com/sirupsen/logrus" pb "go.gazette.dev/core/broker/protocol" mbp "go.gazette.dev/core/mainboilerplate" ) type specResponse struct { - Type string `json:"type"` - DocumentationURL string `json:"documentationURL"` - EndpointSpecSchema json.RawMessage `json:"endpointSpecSchema"` - ResourceSpecSchema json.RawMessage `json:"resourceSpecSchema"` - Oauth2Spec json.RawMessage `json:"oauth2Spec"` + Protocol string `json:"protocol"` + DocumentationURL string `json:"documentationUrl"` + ConfigSchema json.RawMessage `json:"configSchema"` + ResourceConfigSchema json.RawMessage `json:"resourceConfigSchema"` + Oauth2 json.RawMessage `json:"oauth2"` } type apiSpec struct { @@ -84,38 +84,42 @@ func (cmd apiSpec) specCapture(ctx context.Context, spec json.RawMessage) (specR var publisher = ops.NewLocalPublisher(labels.ShardLabeling{ TaskName: cmd.Name, }) - var request = &pc.SpecRequest{ - EndpointType: flow.EndpointType_AIRBYTE_SOURCE, - EndpointSpecJson: spec, + var request = &pc.Request{ + Spec: &pc.Request_Spec{ + ConnectorType: pf.CaptureSpec_IMAGE, + ConfigJson: spec, + }, } - var response, err = connector.Invoke( + var response, err = connector.Invoke[pc.Response]( ctx, request, cmd.Network, publisher, - func(driver *connector.Driver, request *pc.SpecRequest) (*pc.SpecResponse, error) { - return driver.CaptureClient().Spec(ctx, request) + func(driver *connector.Driver) (pc.Connector_CaptureClient, error) { + return driver.CaptureClient().Capture(ctx) }, ) if err != nil { return specResponse{}, err + } else if response.Spec == nil { + return specResponse{}, fmt.Errorf("missing Spec response") } var oauth2Spec bytes.Buffer - if response.Oauth2Spec != nil { + if response.Spec.Oauth2 != nil { // Serialize OAuth2Spec using canonical proto JSON - err = (&jsonpb.Marshaler{}).Marshal(&oauth2Spec, response.Oauth2Spec) + err = (&jsonpb.Marshaler{}).Marshal(&oauth2Spec, response.Spec.Oauth2) if err != nil { return specResponse{}, err } } return specResponse{ - Type: "capture", - DocumentationURL: response.DocumentationUrl, - EndpointSpecSchema: response.EndpointSpecSchemaJson, - ResourceSpecSchema: response.ResourceSpecSchemaJson, - Oauth2Spec: oauth2Spec.Bytes(), + Protocol: "capture", + DocumentationURL: response.Spec.DocumentationUrl, + ConfigSchema: response.Spec.ConfigSchemaJson, + ResourceConfigSchema: response.Spec.ResourceConfigSchemaJson, + Oauth2: oauth2Spec.Bytes(), }, nil } @@ -123,38 +127,42 @@ func (cmd apiSpec) specMaterialization(ctx context.Context, spec json.RawMessage var publisher = ops.NewLocalPublisher(labels.ShardLabeling{ TaskName: cmd.Name, }) - var request = &pm.SpecRequest{ - EndpointType: flow.EndpointType_FLOW_SINK, - EndpointSpecJson: spec, + var request = &pm.Request{ + Spec: &pm.Request_Spec{ + ConnectorType: pf.MaterializationSpec_IMAGE, + ConfigJson: spec, + }, } - var response, err = connector.Invoke( + var response, err = connector.Invoke[pm.Response]( ctx, request, cmd.Network, publisher, - func(driver *connector.Driver, request *pm.SpecRequest) (*pm.SpecResponse, error) { - return driver.MaterializeClient().Spec(ctx, request) + func(driver *connector.Driver) (pm.Connector_MaterializeClient, error) { + return driver.MaterializeClient().Materialize(ctx) }, ) if err != nil { return specResponse{}, err + } else if response.Spec == nil { + return specResponse{}, fmt.Errorf("missing Spec response") } var oauth2Spec bytes.Buffer - if response.Oauth2Spec != nil { + if response.Spec.Oauth2 != nil { // Serialize OAuth2Spec using canonical proto JSON - err = (&jsonpb.Marshaler{}).Marshal(&oauth2Spec, response.Oauth2Spec) + err = (&jsonpb.Marshaler{}).Marshal(&oauth2Spec, response.Spec.Oauth2) if err != nil { return specResponse{}, err } } return specResponse{ - Type: "materialization", - DocumentationURL: response.DocumentationUrl, - EndpointSpecSchema: response.EndpointSpecSchemaJson, - ResourceSpecSchema: response.ResourceSpecSchemaJson, - Oauth2Spec: oauth2Spec.Bytes(), + Protocol: "materialization", + DocumentationURL: response.Spec.DocumentationUrl, + ConfigSchema: response.Spec.ConfigSchemaJson, + ResourceConfigSchema: response.Spec.ResourceConfigSchemaJson, + Oauth2: oauth2Spec.Bytes(), }, nil } diff --git a/go/flowctl-go/cmd-api-test.go b/go/flowctl-go/cmd-api-test.go index d9083a75cf..f5f29c3f58 100644 --- a/go/flowctl-go/cmd-api-test.go +++ b/go/flowctl-go/cmd-api-test.go @@ -57,7 +57,6 @@ func (cmd apiTest) execute(ctx context.Context) error { // Identify tests to verify and associated collections & schemas. var config pf.BuildAPI_Config var collections []*pf.CollectionSpec - var derivations []*pf.DerivationSpec var tests []*pf.TestSpec if err := build.Extract(func(db *sql.DB) error { @@ -67,9 +66,6 @@ func (cmd apiTest) execute(ctx context.Context) error { if collections, err = catalog.LoadAllCollections(db); err != nil { return err } - if derivations, err = catalog.LoadAllDerivations(db); err != nil { - return err - } if tests, err = catalog.LoadAllTests(db); err != nil { return err } @@ -89,7 +85,7 @@ func (cmd apiTest) execute(ctx context.Context) error { return fmt.Errorf("building test driver: %w", err) } - var graph = testing.NewGraph(nil, derivations, nil) + var graph = testing.NewGraph(nil, collections, nil) if err = testing.Initialize(ctx, driver, graph); err != nil { return fmt.Errorf("initializing dataflow tracking: %w", err) } @@ -103,10 +99,10 @@ func (cmd apiTest) execute(ctx context.Context) error { } if scope, err := testing.RunTestCase(ctx, graph, driver, testCase); err != nil { - var path, ptr = scopeToPathAndPtr(config.Directory, scope) + var path, ptr = scopeToPathAndPtr(config.Source, scope) fmt.Println("❌", yellow(path), "failure at step", red(ptr), ":") fmt.Println(err) - failed = append(failed, testCase.Test) + failed = append(failed, testCase.Name) var verify testing.FailedVerifies if errors.As(err, &verify) { @@ -115,8 +111,8 @@ func (cmd apiTest) execute(ctx context.Context) error { } } } else { - var path, _ = scopeToPathAndPtr(config.Directory, testCase.Steps[0].StepScope) - fmt.Println("✔️", path, "::", green(testCase.Test)) + var path, _ = scopeToPathAndPtr(config.Source, testCase.Steps[0].StepScope) + fmt.Println("✔️", path, "::", green(testCase.Name)) } var _, err = tc.ResetState(ctx, &pf.ResetStateRequest{}) @@ -159,7 +155,7 @@ func (cmd apiTest) snapshot(verify testing.FailedVerifies) error { return nil } - var dir = filepath.Join(cmd.Snapshot, verify.Test.Test) + var dir = filepath.Join(cmd.Snapshot, verify.Test.Name) if err := os.MkdirAll(dir, 0755); err != nil { return err } diff --git a/go/flowctl-go/cmd-check.go b/go/flowctl-go/cmd-check.go deleted file mode 100644 index aac3655c86..0000000000 --- a/go/flowctl-go/cmd-check.go +++ /dev/null @@ -1,60 +0,0 @@ -package main - -import ( - "context" - "crypto/rand" - "encoding/base64" - "os" - "path/filepath" - - log "github.com/sirupsen/logrus" - pb "go.gazette.dev/core/broker/protocol" - mbp "go.gazette.dev/core/mainboilerplate" -) - -type cmdCheck struct { - Directory string `long:"directory" default:"." description:"Build directory"` - Network string `long:"network" description:"The Docker network that connector containers are given access to."` - Source string `long:"source" required:"true" description:"Catalog source file or URL to build"` - Log mbp.LogConfig `group:"Logging" namespace:"log" env-namespace:"LOG"` - Diagnostics mbp.DiagnosticsConfig `group:"Debug" namespace:"debug" env-namespace:"DEBUG"` -} - -func (cmd cmdCheck) Execute(_ []string) error { - defer mbp.InitDiagnosticsAndRecover(cmd.Diagnostics)() - mbp.InitLog(cmd.Log) - - log.WithFields(log.Fields{ - "config": cmd, - "version": mbp.Version, - "buildDate": mbp.BuildDate, - }).Info("flowctl configuration") - pb.RegisterGRPCDispatcher("local") - - var buildID = newBuildID() - var err = apiBuild{ - BuildID: buildID, - Directory: cmd.Directory, - Source: cmd.Source, - SourceType: "catalog", - FileRoot: "/", - Network: cmd.Network, - TSGenerate: true, - TSCompile: false, - TSPackage: false, - }.execute(context.Background()) - - // Cleanup output database. - defer func() { _ = os.Remove(filepath.Join(cmd.Directory, buildID)) }() - - return err -} - -func newBuildID() string { - var data [9]byte - var _, err = rand.Read(data[:]) - if err != nil { - panic(err) - } - return base64.URLEncoding.EncodeToString(data[:]) -} diff --git a/go/flowctl-go/cmd-deploy.go b/go/flowctl-go/cmd-deploy.go deleted file mode 100644 index 69ec0497d8..0000000000 --- a/go/flowctl-go/cmd-deploy.go +++ /dev/null @@ -1,113 +0,0 @@ -package main - -import ( - "context" - "fmt" - "os" - "os/exec" - "os/signal" - "path/filepath" - "syscall" - - log "github.com/sirupsen/logrus" - pb "go.gazette.dev/core/broker/protocol" - mbp "go.gazette.dev/core/mainboilerplate" -) - -type cmdDeploy struct { - Broker mbp.ClientConfig `group:"Broker" namespace:"broker" env-namespace:"BROKER"` - Consumer mbp.ClientConfig `group:"Consumer" namespace:"consumer" env-namespace:"CONSUMER"` - Directory string `long:"directory" default:"." description:"Build directory"` - Network string `long:"network" description:"The Docker network that connector containers are given access to."` - Source string `long:"source" required:"true" description:"Catalog source file or URL to build"` - Cleanup bool `long:"wait-and-cleanup" description:"Keep running after deploy until Ctrl-C. Then, delete the deployment from the dataplane."` - Log mbp.LogConfig `group:"Logging" namespace:"log" env-namespace:"LOG"` - Diagnostics mbp.DiagnosticsConfig `group:"Debug" namespace:"debug" env-namespace:"DEBUG"` -} - -func (cmd cmdDeploy) Execute(_ []string) (retErr error) { - defer mbp.InitDiagnosticsAndRecover(cmd.Diagnostics)() - mbp.InitLog(cmd.Log) - - log.WithFields(log.Fields{ - "config": cmd, - "version": mbp.Version, - "buildDate": mbp.BuildDate, - }).Info("flowctl configuration") - pb.RegisterGRPCDispatcher("local") - - var err error - if cmd.Directory, err = filepath.Abs(cmd.Directory); err != nil { - return fmt.Errorf("filepath.Abs: %w", err) - } - - buildsRoot, err := getBuildsRoot(context.Background(), cmd.Consumer) - if err != nil { - return fmt.Errorf("fetching builds root: %w", err) - } else if buildsRoot.Scheme != "file" { - return fmt.Errorf("this action currently only supports local data planes. See `api activate` instead") - } - - // Build into a new database. - var buildID = newBuildID() - if err := (apiBuild{ - BuildID: buildID, - Directory: cmd.Directory, - FileRoot: "/", - Network: cmd.Network, - Source: cmd.Source, - SourceType: "catalog", - TSPackage: true, - }.execute(context.Background())); err != nil { - return err - } - - // Move the build database into the data plane temp directory. - // Shell to `mv` (vs os.Rename) for it's proper handling of cross-volume moves. - if err := exec.Command("mv", - filepath.Join(cmd.Directory, buildID), - filepath.Join(buildsRoot.Path, buildID), - ).Run(); err != nil { - return fmt.Errorf("moving build to local data plane builds root: %w", err) - } - - // Activate the built database into the data plane. - var activate = apiActivate{ - Broker: cmd.Broker, - Consumer: cmd.Consumer, - BuildID: buildID, - Network: cmd.Network, - InitialSplits: 1, - All: true, - } - if err = activate.execute(context.Background()); err != nil { - return err - } - - if !cmd.Cleanup { - return nil // All done. - } - - // Install a signal handler which will cancel our context. - var sigCh = make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGINT) - - fmt.Println("Deployment done. Waiting for Ctrl-C to clean up and exit.") - <-sigCh - fmt.Println("Signaled to exit. Cleaning up deployment.") - - // Delete derivations and collections from the local dataplane. - var delete = apiDelete{ - Broker: cmd.Broker, - Consumer: cmd.Consumer, - BuildID: buildID, - Network: cmd.Network, - All: true, - } - if err = delete.execute(context.Background()); err != nil { - return err - } - - fmt.Println("All done.") - return nil -} diff --git a/go/flowctl-go/cmd-discover.go b/go/flowctl-go/cmd-discover.go deleted file mode 100644 index 5216f80bd5..0000000000 --- a/go/flowctl-go/cmd-discover.go +++ /dev/null @@ -1,364 +0,0 @@ -package main - -import ( - "bytes" - "context" - "database/sql" - "encoding/json" - "fmt" - "io" - "io/ioutil" - "os" - "path" - "path/filepath" - "sort" - "strings" - "time" - - "github.com/estuary/flow/go/bindings" - "github.com/estuary/flow/go/flow" - "github.com/estuary/flow/go/protocols/catalog" - pf "github.com/estuary/flow/go/protocols/flow" - log "github.com/sirupsen/logrus" - pb "go.gazette.dev/core/broker/protocol" - mbp "go.gazette.dev/core/mainboilerplate" - "gopkg.in/yaml.v3" -) - -type cmdDiscover struct { - Log mbp.LogConfig `group:"Logging" namespace:"log" env-namespace:"LOG"` - Diagnostics mbp.DiagnosticsConfig `group:"Debug" namespace:"debug" env-namespace:"DEBUG"` - Image string `long:"image" required:"true" description:"Docker image of the connector to use"` - Network string `long:"network" description:"The Docker network that connector containers are given access to."` - Prefix string `long:"prefix" default:"acmeCo" description:"Prefix of generated catalog entities. For example, an organization or company name."` - Directory string `long:"directory" description:"Output directory for catalog source files. Defaults to --prefix"` -} - -func (cmd cmdDiscover) Execute(_ []string) error { - defer mbp.InitDiagnosticsAndRecover(cmd.Diagnostics)() - mbp.InitLog(cmd.Log) - - var ctx, cancelFunc = context.WithTimeout(context.Background(), time.Second*30) - - defer cancelFunc() - - log.WithFields(log.Fields{ - "config": cmd, - "version": mbp.Version, - "buildDate": mbp.BuildDate, - }).Info("flowctl configuration") - pb.RegisterGRPCDispatcher("local") - - var imageParts = strings.Split(cmd.Image, "/") - var connectorName = strings.Split(imageParts[len(imageParts)-1], ":")[0] - - // Directory defaults to --prefix. - if cmd.Directory == "" { - cmd.Directory = cmd.Prefix - } - - if err := os.MkdirAll(cmd.Directory, 0755); err != nil { - return fmt.Errorf("creating output directory: %w", err) - } else if cmd.Directory, err = filepath.Abs(cmd.Directory); err != nil { - return fmt.Errorf("getting absolute directory: %w", err) - } - - var configName = fmt.Sprintf("%s.config.yaml", connectorName) - var configPath = filepath.Join(cmd.Directory, configName) - var catalogPath = filepath.Join(cmd.Directory, fmt.Sprintf("%s.flow.yaml", connectorName)) - - // If the configuration file doesn't exist, write it as a stub. - if w, err := os.OpenFile(configPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600); err == nil { - fmt.Printf(` -Creating a connector configuration stub at %s. -Edit and update this file, and then run this command again. -`, configPath) - - if err = cmd.writeConfigStub(ctx, w); err != nil { - _ = os.Remove(configPath) // Don't leave an empty file behind. - } - return err - } else if !os.IsExist(err) { - return err - } - - // Discover bindings and write the output catalog. - discovered, err := apiDiscover{ - Log: cmd.Log, - Diagnostics: cmd.Diagnostics, - Image: cmd.Image, - Network: cmd.Network, - Config: configPath, - Output: "", // Not required. - }.execute(ctx) - if err != nil { - return err - } else if err := discovered.Validate(); err != nil { - return err - } - - type Collection struct { - Schema string - Key []string `yaml:",flow"` - } - var collections = make(map[string]Collection) - - type Binding struct { - Resource interface{} `yaml:"resource"` - Target string `yaml:"target"` - } - type Capture struct { - Endpoint struct { - Spec struct { - Image string `yaml:"image"` - Config string `yaml:"config"` - } `yaml:"connector"` - } `yaml:"endpoint"` - Bindings []Binding `yaml:"bindings"` - } - var capture Capture - var hasEmptyKeys bool - - capture.Endpoint.Spec.Image = cmd.Image - capture.Endpoint.Spec.Config = configName - - for _, b := range discovered.Bindings { - var collection = path.Join(cmd.Prefix, b.RecommendedName.String()) - var schemaName = fmt.Sprintf("%s.schema.yaml", b.RecommendedName) - var outputPath = filepath.Join(cmd.Directory, schemaName) - var outputDir = path.Dir(outputPath) - - if err := os.MkdirAll(outputDir, 0755); err != nil { - return fmt.Errorf("creating output directory: %w", err) - } - - var schema, resource interface{} - if err := json.Unmarshal(b.DocumentSchemaJson, &schema); err != nil { - return fmt.Errorf("decoding schema of %s: %w", collection, err) - } else if err = json.Unmarshal(b.ResourceSpecJson, &resource); err != nil { - return fmt.Errorf("decoding resource of %s: %w", collection, err) - } - - // Write out schema file. - var schemaBytes bytes.Buffer - var enc = yaml.NewEncoder(&schemaBytes) - enc.SetIndent(2) - if err := enc.Encode(schema); err != nil { - return fmt.Errorf("encoding schema: %w", err) - } else if err = enc.Close(); err != nil { - return fmt.Errorf("encoding schema: %w", err) - } else if err = ioutil.WriteFile(filepath.Join(cmd.Directory, schemaName), schemaBytes.Bytes(), 0644); err != nil { - return fmt.Errorf("writing schema: %w", err) - } - - collections[collection] = Collection{ - Key: b.KeyPtrs, - Schema: schemaName, - } - capture.Bindings = append(capture.Bindings, Binding{ - Target: collection, - Resource: resource, - }) - - if len(b.KeyPtrs) == 0 { - hasEmptyKeys = true - } - } - - w, err := os.Create(catalogPath) - if err != nil { - return fmt.Errorf("opening output catalog: %w", err) - } - var enc = yaml.NewEncoder(w) - enc.SetIndent(2) - - if err = enc.Encode(struct { - Collections map[string]Collection - Captures map[string]Capture - }{ - collections, - map[string]Capture{ - path.Join(cmd.Prefix, connectorName): capture, - }, - }); err == nil { - err = enc.Close() - } - if err == nil { - err = w.Close() - } - - if err != nil { - return fmt.Errorf("writing output catalog: %w", err) - } - - fmt.Printf(` -Created a Flow catalog at %s -with discovered collections and capture bindings. -`, catalogPath) - - if hasEmptyKeys { - fmt.Print(` -A native key couldn't be determined for all collections. -You must manually add appropriate keys, and update associated collection schemas -(for example, by marking corresponding properties as "required"). -`) - } - - return nil -} - -func (cmd cmdDiscover) writeConfigStub(ctx context.Context, w io.WriteCloser) error { - var spec = apiSpec{ - Log: cmd.Log, - Diagnostics: cmd.Diagnostics, - Image: cmd.Image, - Network: cmd.Network, - } - - var resp, err = spec.execute(ctx) - if err != nil { - return fmt.Errorf("querying connector spec: %w", err) - } - - // TODO(johnny): Factor out into a schema tool. - tmpdir, err := ioutil.TempDir("", "flow-discover") - if err != nil { - return fmt.Errorf("creating temp directory: %w", err) - } - defer os.RemoveAll(tmpdir) - - var tmpfile = filepath.Join(tmpdir, "schema.yaml") - mbp.Must(ioutil.WriteFile(tmpfile, resp.EndpointSpecSchema, 0600), "writing spec") - - // Build the schema - var buildConfig = pf.BuildAPI_Config{ - BuildId: newBuildID(), - Directory: tmpdir, - Source: tmpfile, - SourceType: pf.ContentType_JSON_SCHEMA, - } - // Cleanup output database. - defer func() { _ = os.Remove(buildConfig.OutputPath()) }() - - if err = bindings.BuildCatalog(bindings.BuildArgs{ - Context: ctx, - BuildAPI_Config: buildConfig, - FileRoot: "/", - }); err != nil { - return fmt.Errorf("building schema catalog: %w", err) - } - - // Load extracted schema locations. - var locations []catalog.SchemaLocation - if err = catalog.Extract(buildConfig.OutputPath(), func(db *sql.DB) error { - if locations, err = catalog.LoadAllInferences(db); err != nil { - return fmt.Errorf("loading inferences: %w", err) - } - return nil - }); err != nil { - return err - } - - var config interface{} - - // Visit leaf-most schema locations first. - // Because we're creating yaml.Nodes instead of []interface{} - // or map[string]interface{}, ptr.Create() is unable to create - // a sub-location after visiting its parent. - sort.Slice(locations, func(i int, j int) bool { - return len(locations[i].Location) > len(locations[j].Location) - }) - - for _, loc := range locations { - if ptr, err := flow.NewPointer(loc.Location); err != nil { - return fmt.Errorf("build pointer: %w", err) - } else if node, err := ptr.Create(&config); err != nil { - return fmt.Errorf("creating location %q: %w", loc.Location, err) - } else if *node == nil { - var nn, err = buildStubNode(&loc.Spec) - if err != nil { - return fmt.Errorf("location %s: %w", loc.Location, err) - } - *node = nn - } - } - - var enc = yaml.NewEncoder(w) - enc.SetIndent(2) - - if err = enc.Encode(config); err == nil { - err = w.Close() - } - if err != nil { - return fmt.Errorf("writing config: %w", err) - } - - return nil -} - -// getDefaultType returns the type to use for generating a default value for endpoint configuration. -// It will always prefer a scalar type if a location allows multiple types. -func getDefaultType(inference *pf.Inference) string { - var fallback string - for _, ty := range inference.Types { - if ty == pf.JsonTypeString || ty == pf.JsonTypeBoolean || ty == pf.JsonTypeInteger || ty == pf.JsonTypeNumber { - return ty - } else { - fallback = ty - } - } - return fallback -} - -func buildStubNode(inference *pf.Inference) (*yaml.Node, error) { - var node = new(yaml.Node) - - if len(inference.DefaultJson) != 0 { - if err := yaml.NewDecoder(bytes.NewReader(inference.DefaultJson)).Decode(node); err != nil { - return nil, fmt.Errorf("decoding schema `default` value: %w", err) - } - node = node.Content[0] // Unwrap root document node. - } else { - // The explicit tags are necessary for the encoder to know how to render these. They - // will not be included in the final output. - switch getDefaultType(inference) { - case pf.JsonTypeString: - node.SetString("") - case pf.JsonTypeInteger: - node.Value = "0" - node.Tag = "!!int" - case pf.JsonTypeNumber: - node.Value = "0.0" - node.Tag = "!!float" - case pf.JsonTypeBoolean: - node.Value = "false" - node.Tag = "!!bool" - case pf.JsonTypeObject: - node.Value = "{}" - node.Tag = "!!map" - case pf.JsonTypeArray: - node.Value = "[]" - node.Tag = "!!seq" - case pf.JsonTypeNull: - node.Tag = "!!null" - } - - // Required to get numbers and booleans to render correctly (?). - node.Kind = yaml.ScalarNode - } - - // Renders values inline with keys, instead of on the next line. - node.Style = yaml.FlowStyle - - node.FootComment = - fmt.Sprintf("%s\n%s", inference.Description, inference.Types) - - if inference.Exists == pf.Inference_MUST { - node.FootComment += " (required)" - } - if inference.Secret { - node.FootComment += " (secret)" - } - - return node, nil -} diff --git a/go/flowctl-go/cmd-shards-split.go b/go/flowctl-go/cmd-shards-split.go index 6f64df5fd3..beda92fa85 100644 --- a/go/flowctl-go/cmd-shards-split.go +++ b/go/flowctl-go/cmd-shards-split.go @@ -9,6 +9,7 @@ import ( "github.com/estuary/flow/go/labels" "github.com/estuary/flow/go/protocols/catalog" pf "github.com/estuary/flow/go/protocols/flow" + "github.com/estuary/flow/go/protocols/ops" log "github.com/sirupsen/logrus" "go.gazette.dev/core/broker/client" pb "go.gazette.dev/core/broker/protocol" @@ -83,15 +84,15 @@ func (cmd cmdShardsSplit) execute(ctx context.Context) error { var task pf.Task if err := build.Extract(func(db *sql.DB) error { switch labeling.TaskType { - case labels.TaskTypeCapture: + case ops.TaskType_capture: capture, err := catalog.LoadCapture(db, labeling.TaskName) task = capture return err - case labels.TaskTypeDerivation: - derivation, err := catalog.LoadDerivation(db, labeling.TaskName) + case ops.TaskType_derivation: + derivation, err := catalog.LoadCollection(db, labeling.TaskName) task = derivation return err - case labels.TaskTypeMaterialization: + case ops.TaskType_materialization: materialization, err := catalog.LoadMaterialization(db, labeling.TaskName) task = materialization return err diff --git a/go/flowctl-go/cmd-temp-data-plane.go b/go/flowctl-go/cmd-temp-data-plane.go index dc8caa050f..04e7e452af 100644 --- a/go/flowctl-go/cmd-temp-data-plane.go +++ b/go/flowctl-go/cmd-temp-data-plane.go @@ -130,12 +130,10 @@ func (cmd cmdTempDataPlane) etcdCmd(ctx context.Context, tempdir string) (*exec. "--data-dir", filepath.Join(tempdir, "data-plane.etcd"), "--listen-client-urls", "unix://client.sock:0", "--listen-peer-urls", "unix://peer.sock:0", + "--log-level", "error", + "--logger", "zap", "--name", "data-plane", ) - // The Etcd --log-level flag was added in v3.4. Use it's environment variable - // version to remain compatible with older `etcd` binaries. - out.Env = append(out.Env, "ETCD_LOG_LEVEL=error", "ETCD_LOGGER=zap") - out.Env = append(out.Env, os.Environ()...) out.Dir = tempdir out.Stdout = os.Stdout diff --git a/go/flowctl-go/cmd-test.go b/go/flowctl-go/cmd-test.go index fc4948e8cc..f4bdcb5845 100644 --- a/go/flowctl-go/cmd-test.go +++ b/go/flowctl-go/cmd-test.go @@ -5,7 +5,6 @@ import ( "fmt" "io/ioutil" "os" - "os/exec" "os/signal" "path/filepath" "syscall" @@ -16,7 +15,6 @@ import ( ) type cmdTest struct { - Directory string `long:"directory" default:"." description:"Build directory"` Network string `long:"network" description:"The Docker network that connector containers are given access to."` Source string `long:"source" required:"true" description:"Catalog source file or URL to build"` Snapshot string `long:"snapshot" description:"When set, failed test verifications produce snapshots into the given base directory"` @@ -35,11 +33,6 @@ func (cmd cmdTest) Execute(_ []string) (retErr error) { }).Info("flowctl configuration") protocol.RegisterGRPCDispatcher("local") - var err error - if cmd.Directory, err = filepath.Abs(cmd.Directory); err != nil { - return fmt.Errorf("filepath.Abs: %w", err) - } - // Create a temporary directory which will contain the Etcd database // and various unix:// sockets. tempdir, err := ioutil.TempDir("", "flow-test") @@ -65,31 +58,20 @@ func (cmd cmdTest) Execute(_ []string) (retErr error) { return fmt.Errorf("starting local data plane: %w", err) } - // Build into a new database. Arrange to clean it up on exit. - var buildID = newBuildID() - defer func() { _ = os.Remove(filepath.Join(cmd.Directory, buildID)) }() + var buildID = "test-build-id" if err := (apiBuild{ - BuildID: buildID, - Directory: cmd.Directory, + BuildID: buildID, + // Build directly into the temp dataplane's build directory. + BuildDB: filepath.Join(tempdir, "builds", buildID), FileRoot: "/", Network: cmd.Network, Source: cmd.Source, SourceType: "catalog", - TSPackage: true, }.execute(ctx)); err != nil { return err } - // Move the build database into the data plane temp directory. - // Shell to `mv` (vs os.Rename) for it's proper handling of cross-volume moves. - if err := exec.Command("mv", - filepath.Join(cmd.Directory, buildID), - filepath.Join(tempdir, "builds", buildID), - ).Run(); err != nil { - return fmt.Errorf("moving build to local data plane builds root: %w", err) - } - // Activate derivations of the built database into the local dataplane. var activate = apiActivate{ BuildID: buildID, diff --git a/go/flowctl-go/main.go b/go/flowctl-go/main.go index 15cc65e587..6af7848786 100644 --- a/go/flowctl-go/main.go +++ b/go/flowctl-go/main.go @@ -16,29 +16,6 @@ func main() { Locally test a Flow catalog. `, &cmdTest{}) - addCmd(parser, "check", "Check a Flow catalog for errors", ` -Quickly load and validate a Flow catalog, and generate updated TypeScript types. -`, &cmdCheck{}) - - addCmd(parser, "discover", "Discover available captures of an endpoint", ` -Inspect a configured endpoint, and generate a Flow catalog of collections, -schemas, and capture bindings which reflect its available resources. - -Discover is a two-stage workflow: - -In the first invocation, the command will generate a stub -configuration YAML derived from the connector's specification. -The user reviews this YAML file, and updates it with appropriate -credentials and configuration. - -In the second invocation, the command applies the completed -configuration to the endpoint and determines its available resource -bindings. It generates a Flow catalog YAML file with a Flow Capture -and associated Collection definitions. The user may then review, -update, refactor, and otherwise incorporate the generated entities -into their broader Flow catalog. -`, &cmdDiscover{}) - addCmd(parser, "json-schema", "Print the catalog JSON schema", ` Print the JSON schema specification of Flow catalogs, as understood by this specific build of Flow. This JSON schema can be used to enable IDE support @@ -52,14 +29,6 @@ fragments to the configured storage mappings of collections and Flow tasks. Upon exit, all data is discarded. `, &cmdTempDataPlane{}) - addCmd(parser, "deploy", "Build a catalog and deploy it to a data plane", ` -Build a catalog from --source. Then, activate it into a data plane. - -If --block-and-cleanup, then await a Ctrl-C from the user and then fully remove -the deployment, cleaning up all its effects and restoring the data plane to -its original state. -`, &cmdDeploy{}) - serve, err := parser.Command.AddCommand("serve", "Serve a component of Flow", "", &struct{}{}) mbp.Must(err, "failed to add command") diff --git a/go/materialize/driver/sqlite/.snapshots/TestSQLiteDriver b/go/materialize/driver/sqlite/.snapshots/TestSQLiteDriver index 5085d33be0..dfda9d05bd 100644 --- a/go/materialize/driver/sqlite/.snapshots/TestSQLiteDriver +++ b/go/materialize/driver/sqlite/.snapshots/TestSQLiteDriver @@ -7,5 +7,5 @@ key4Value, , , , { "theKey": "key4Value" } flow_checkpoints_v1: materialization, key_begin, key_end, fence, checkpoint -a/materialization, 0, 4294967295, 6, aW5pdGlhbCBjaGVja3BvaW50IGZpeHR1cmU= -a/materialization, 100, 200, 6, dGhpcmQgY2hlY2twb2ludCB2YWx1ZQ== +a/materialization, 0, 4294967295, 6, Cg8KCWEvam91cm5hbBICCAE= +a/materialization, 100, 200, 6, ChAKCWEvam91cm5hbBIDCM0C diff --git a/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-optional-multi-types b/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-optional-multi-types index ebf21359ac..7164260567 100644 --- a/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-optional-multi-types +++ b/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-optional-multi-types @@ -1,13 +1,13 @@ -(map[string]*materialize.Constraint) (len=11) { - (string) (len=3) "any": (*materialize.Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), - (string) (len=17) "boolOrArrayOrNull": (*materialize.Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), - (string) (len=12) "boolOrString": (*materialize.Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), - (string) (len=13) "flow_document": (*materialize.Constraint)(type:LOCATION_REQUIRED reason:"The root document must be materialized" ), - (string) (len=18) "intDifferentRanges": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=8) "intOrNum": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=14) "intOrNumOrNull": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=25) "intOrNumOverlappingRanges": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=17) "intOrObjectOrNull": (*materialize.Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), - (string) (len=11) "stringOrInt": (*materialize.Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), - (string) (len=6) "theKey": (*materialize.Constraint)(type:LOCATION_REQUIRED reason:"All Locations that are part of the collections key are required" ) +(map[string]*materialize.Response_Validated_Constraint) (len=11) { + (string) (len=3) "any": (*materialize.Response_Validated_Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), + (string) (len=17) "boolOrArrayOrNull": (*materialize.Response_Validated_Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), + (string) (len=12) "boolOrString": (*materialize.Response_Validated_Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), + (string) (len=13) "flow_document": (*materialize.Response_Validated_Constraint)(type:LOCATION_REQUIRED reason:"The root document must be materialized" ), + (string) (len=18) "intDifferentRanges": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=8) "intOrNum": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=14) "intOrNumOrNull": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=25) "intOrNumOverlappingRanges": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=17) "intOrObjectOrNull": (*materialize.Response_Validated_Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), + (string) (len=11) "stringOrInt": (*materialize.Response_Validated_Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), + (string) (len=6) "theKey": (*materialize.Response_Validated_Constraint)(type:LOCATION_REQUIRED reason:"All Locations that are part of the collections key are required" ) } diff --git a/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-optionals b/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-optionals index 06f4ad39a4..daf7e74e63 100644 --- a/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-optionals +++ b/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-optionals @@ -1,10 +1,10 @@ -(map[string]*materialize.Constraint) (len=8) { - (string) (len=5) "array": (*materialize.Constraint)(type:FIELD_OPTIONAL reason:"This field is able to be materialized" ), - (string) (len=4) "bool": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=13) "flow_document": (*materialize.Constraint)(type:LOCATION_REQUIRED reason:"The root document must be materialized" ), - (string) (len=3) "int": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=6) "number": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=6) "object": (*materialize.Constraint)(type:FIELD_OPTIONAL reason:"This field is able to be materialized" ), - (string) (len=6) "string": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=6) "theKey": (*materialize.Constraint)(type:LOCATION_REQUIRED reason:"All Locations that are part of the collections key are required" ) +(map[string]*materialize.Response_Validated_Constraint) (len=8) { + (string) (len=5) "array": (*materialize.Response_Validated_Constraint)(type:FIELD_OPTIONAL reason:"This field is able to be materialized" ), + (string) (len=4) "bool": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=13) "flow_document": (*materialize.Response_Validated_Constraint)(type:LOCATION_REQUIRED reason:"The root document must be materialized" ), + (string) (len=3) "int": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=6) "number": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=6) "object": (*materialize.Response_Validated_Constraint)(type:FIELD_OPTIONAL reason:"This field is able to be materialized" ), + (string) (len=6) "string": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=6) "theKey": (*materialize.Response_Validated_Constraint)(type:LOCATION_REQUIRED reason:"All Locations that are part of the collections key are required" ) } diff --git a/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-required-nullable b/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-required-nullable index f8273f7a60..14f410d526 100644 --- a/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-required-nullable +++ b/go/materialize/driver/sqlite/.snapshots/TestValidations-NewSQLProjections-required-nullable @@ -1,11 +1,11 @@ -(map[string]*materialize.Constraint) (len=9) { - (string) (len=5) "array": (*materialize.Constraint)(type:FIELD_OPTIONAL reason:"This field is able to be materialized" ), - (string) (len=7) "boolean": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=13) "flow_document": (*materialize.Constraint)(type:LOCATION_REQUIRED reason:"The root document must be materialized" ), - (string) (len=7) "integer": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=4) "null": (*materialize.Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), - (string) (len=6) "number": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=6) "object": (*materialize.Constraint)(type:FIELD_OPTIONAL reason:"This field is able to be materialized" ), - (string) (len=6) "string": (*materialize.Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), - (string) (len=6) "theKey": (*materialize.Constraint)(type:LOCATION_REQUIRED reason:"All Locations that are part of the collections key are required" ) +(map[string]*materialize.Response_Validated_Constraint) (len=9) { + (string) (len=5) "array": (*materialize.Response_Validated_Constraint)(type:FIELD_OPTIONAL reason:"This field is able to be materialized" ), + (string) (len=7) "boolean": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=13) "flow_document": (*materialize.Response_Validated_Constraint)(type:LOCATION_REQUIRED reason:"The root document must be materialized" ), + (string) (len=7) "integer": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=4) "null": (*materialize.Response_Validated_Constraint)(type:FIELD_FORBIDDEN reason:"Cannot materialize this field" ), + (string) (len=6) "number": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=6) "object": (*materialize.Response_Validated_Constraint)(type:FIELD_OPTIONAL reason:"This field is able to be materialized" ), + (string) (len=6) "string": (*materialize.Response_Validated_Constraint)(type:LOCATION_RECOMMENDED reason:"The projection has a single scalar type" ), + (string) (len=6) "theKey": (*materialize.Response_Validated_Constraint)(type:LOCATION_REQUIRED reason:"All Locations that are part of the collections key are required" ) } diff --git a/go/materialize/driver/sqlite/sqlite.go b/go/materialize/driver/sqlite/sqlite.go index 7b95bde1db..8afe78d609 100644 --- a/go/materialize/driver/sqlite/sqlite.go +++ b/go/materialize/driver/sqlite/sqlite.go @@ -15,6 +15,7 @@ import ( _ "github.com/mattn/go-sqlite3" // Import for register side-effects. log "github.com/sirupsen/logrus" pb "go.gazette.dev/core/broker/protocol" + pc "go.gazette.dev/core/consumer/protocol" "go.gazette.dev/core/server" "go.gazette.dev/core/task" ) @@ -167,7 +168,7 @@ func NewInProcessServer(ctx context.Context) (*InProcessServer, error) { var group = task.NewGroup(pb.WithDispatchDefault(ctx)) var server = server.MustLoopback() - pm.RegisterDriverServer(server.GRPCServer, NewSQLiteDriver()) + pm.RegisterConnectorServer(server.GRPCServer, NewSQLiteDriver()) server.QueueTasks(group) group.GoRun() @@ -178,8 +179,8 @@ func NewInProcessServer(ctx context.Context) (*InProcessServer, error) { }, nil } -func (s *InProcessServer) Client() pm.DriverClient { - return pm.NewDriverClient(s.server.GRPCLoopback) +func (s *InProcessServer) Client() pm.ConnectorClient { + return pm.NewConnectorClient(s.server.GRPCLoopback) } func (s *InProcessServer) Stop() error { @@ -378,7 +379,7 @@ func (d *transactor) Store(it *pm.StoreIterator) (pm.StartCommitFunc, error) { } } - return func(ctx context.Context, runtimeCheckpoint []byte, _ <-chan struct{}) (*pf.DriverCheckpoint, pf.OpFuture) { + return func(ctx context.Context, runtimeCheckpoint *pc.Checkpoint, _ <-chan struct{}) (*pf.ConnectorState, pf.OpFuture) { d.store.fence.SetCheckpoint(runtimeCheckpoint) return nil, pf.RunAsyncOperation(func() error { return commitTxn(ctx, txn, d.store.fence) }) }, nil diff --git a/go/materialize/driver/sqlite/sqlite_test.go b/go/materialize/driver/sqlite/sqlite_test.go index a9f443fa42..828de7f36f 100644 --- a/go/materialize/driver/sqlite/sqlite_test.go +++ b/go/materialize/driver/sqlite/sqlite_test.go @@ -21,6 +21,7 @@ import ( sqlDriver "github.com/estuary/flow/go/protocols/materialize/sql" "github.com/stretchr/testify/require" pb "go.gazette.dev/core/broker/protocol" + pc "go.gazette.dev/core/consumer/protocol" ) func TestSQLGeneration(t *testing.T) { @@ -31,7 +32,7 @@ func TestSQLGeneration(t *testing.T) { FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "fixture", - Directory: t.TempDir(), + BuildDb: path.Join(t.TempDir(), "build.db"), Source: "file:///sql-gen.yaml", SourceType: pf.ContentType_CATALOG, }, @@ -39,7 +40,7 @@ func TestSQLGeneration(t *testing.T) { require.NoError(t, bindings.BuildCatalog(args)) var spec *pf.MaterializationSpec - require.NoError(t, catalog.Extract(args.OutputPath(), func(db *sql.DB) (err error) { + require.NoError(t, catalog.Extract(args.BuildDb, func(db *sql.DB) (err error) { spec, err = catalog.LoadMaterialization(db, "test/sqlite") return err })) @@ -74,17 +75,6 @@ func TestSQLGeneration(t *testing.T) { require.Equal(t, `DELETE FROM load.keys_123 ;`, keyTruncate) } -func TestSpecification(t *testing.T) { - var resp, err = sqlite.NewSQLiteDriver(). - Spec(context.Background(), &pm.SpecRequest{EndpointType: pf.EndpointType_AIRBYTE_SOURCE}) - require.NoError(t, err) - - formatted, err := json.MarshalIndent(resp, "", " ") - require.NoError(t, err) - - cupaloy.SnapshotT(t, formatted) -} - func TestSQLiteDriver(t *testing.T) { pb.RegisterGRPCDispatcher("local") @@ -93,7 +83,7 @@ func TestSQLiteDriver(t *testing.T) { FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "fixture", - Directory: t.TempDir(), + BuildDb: path.Join(t.TempDir(), "build.db"), Source: "file:///driver-steps.yaml", SourceType: pf.ContentType_CATALOG, }, @@ -102,7 +92,7 @@ func TestSQLiteDriver(t *testing.T) { // Model MaterializationSpec we'll *mostly* use, but vary slightly in this test. var model *pf.MaterializationSpec - require.NoError(t, catalog.Extract(args.OutputPath(), func(db *sql.DB) (err error) { + require.NoError(t, catalog.Extract(args.BuildDb, func(db *sql.DB) (err error) { model, err = catalog.LoadMaterialization(db, "a/materialization") return err })) @@ -113,6 +103,9 @@ func TestSQLiteDriver(t *testing.T) { var driver = server.Client() var ctx = pb.WithDispatchDefault(context.Background()) + transaction, err := driver.Materialize(ctx) + require.NoError(t, err) + // Config fixture which matches schema of ParseConfig. var endpointConfig = struct { Path string @@ -120,35 +113,36 @@ func TestSQLiteDriver(t *testing.T) { var endpointJSON, _ = json.Marshal(endpointConfig) // Validate should return constraints for a non-existant materialization - var validateReq = pm.ValidateRequest{ - Materialization: model.Materialization, - EndpointType: pf.EndpointType_SQLITE, - EndpointSpecJson: json.RawMessage(endpointJSON), - Bindings: []*pm.ValidateRequest_Binding{ + var validateReq = pm.Request_Validate{ + Name: model.Name, + ConnectorType: pf.MaterializationSpec_SQLITE, + ConfigJson: json.RawMessage(endpointJSON), + Bindings: []*pm.Request_Validate_Binding{ { - Collection: model.Bindings[0].Collection, - ResourceSpecJson: model.Bindings[0].ResourceSpecJson, + Collection: model.Bindings[0].Collection, + ResourceConfigJson: model.Bindings[0].ResourceConfigJson, }, }, } + require.NoError(t, transaction.Send(&pm.Request{Validate: &validateReq})) - validateResp, err := driver.Validate(ctx, &validateReq) + validateResp, err := transaction.Recv() require.NoError(t, err) // There should be a constraint for every projection - require.Equal(t, &pm.ValidateResponse_Binding{ - Constraints: map[string]*pm.Constraint{ - "array": {Type: pm.Constraint_FIELD_OPTIONAL, Reason: "This field is able to be materialized"}, - "bool": {Type: pm.Constraint_LOCATION_RECOMMENDED, Reason: "The projection has a single scalar type"}, - "flow_document": {Type: pm.Constraint_LOCATION_REQUIRED, Reason: "The root document must be materialized"}, - "int": {Type: pm.Constraint_LOCATION_RECOMMENDED, Reason: "The projection has a single scalar type"}, - "number": {Type: pm.Constraint_LOCATION_RECOMMENDED, Reason: "The projection has a single scalar type"}, - "object": {Type: pm.Constraint_FIELD_OPTIONAL, Reason: "This field is able to be materialized"}, - "string": {Type: pm.Constraint_LOCATION_RECOMMENDED, Reason: "The projection has a single scalar type"}, - "theKey": {Type: pm.Constraint_LOCATION_REQUIRED, Reason: "All Locations that are part of the collections key are required"}, + require.Equal(t, &pm.Response_Validated_Binding{ + Constraints: map[string]*pm.Response_Validated_Constraint{ + "array": {Type: pm.Response_Validated_Constraint_FIELD_OPTIONAL, Reason: "This field is able to be materialized"}, + "bool": {Type: pm.Response_Validated_Constraint_LOCATION_RECOMMENDED, Reason: "The projection has a single scalar type"}, + "flow_document": {Type: pm.Response_Validated_Constraint_LOCATION_REQUIRED, Reason: "The root document must be materialized"}, + "int": {Type: pm.Response_Validated_Constraint_LOCATION_RECOMMENDED, Reason: "The projection has a single scalar type"}, + "number": {Type: pm.Response_Validated_Constraint_LOCATION_RECOMMENDED, Reason: "The projection has a single scalar type"}, + "object": {Type: pm.Response_Validated_Constraint_FIELD_OPTIONAL, Reason: "This field is able to be materialized"}, + "string": {Type: pm.Response_Validated_Constraint_LOCATION_RECOMMENDED, Reason: "The projection has a single scalar type"}, + "theKey": {Type: pm.Response_Validated_Constraint_LOCATION_REQUIRED, Reason: "All Locations that are part of the collections key are required"}, }, DeltaUpdates: false, ResourcePath: model.Bindings[0].ResourcePath, - }, validateResp.Bindings[0]) + }, validateResp.Validated.Bindings[0]) // Select some fields and Apply the materialization var fields = pf.FieldSelection{ @@ -156,51 +150,47 @@ func TestSQLiteDriver(t *testing.T) { Values: []string{"bool", "int", "string"}, // intentionally missing "number" field Document: "flow_document", } - var applyReq = pm.ApplyRequest{ + var applyReq = pm.Request_Apply{ Materialization: &pf.MaterializationSpec{ - Materialization: model.Materialization, - EndpointType: pf.EndpointType_SQLITE, - EndpointSpecJson: json.RawMessage(endpointJSON), + Name: model.Name, + ConnectorType: pf.MaterializationSpec_SQLITE, + ConfigJson: json.RawMessage(endpointJSON), Bindings: []*pf.MaterializationSpec_Binding{ { - Collection: model.Bindings[0].Collection, - FieldSelection: fields, - ResourcePath: model.Bindings[0].ResourcePath, - ResourceSpecJson: model.Bindings[0].ResourceSpecJson, - DeltaUpdates: false, - Shuffle: model.Bindings[0].Shuffle, + Collection: model.Bindings[0].Collection, + FieldSelection: fields, + ResourcePath: model.Bindings[0].ResourcePath, + ResourceConfigJson: model.Bindings[0].ResourceConfigJson, + DeltaUpdates: false, + PartitionSelector: model.Bindings[0].PartitionSelector, }, }, ShardTemplate: model.ShardTemplate, RecoveryLogTemplate: model.RecoveryLogTemplate, }, Version: "the-version", - DryRun: true, } + require.NoError(t, transaction.Send(&pm.Request{Apply: &applyReq})) - applyResp, err := driver.ApplyUpsert(ctx, &applyReq) - require.NoError(t, err) - require.NotEmpty(t, applyResp.ActionDescription) - - applyReq.DryRun = false - applyResp, err = driver.ApplyUpsert(ctx, &applyReq) + applyResp, err := transaction.Recv() require.NoError(t, err) - require.NotEmpty(t, applyResp.ActionDescription) + require.NotEmpty(t, applyResp.Applied.ActionDescription) // Now that we've applied, call Validate again to ensure the existing fields are accounted for - validateResp, err = driver.Validate(ctx, &validateReq) + require.NoError(t, transaction.Send(&pm.Request{Validate: &validateReq})) + validateResp, err = transaction.Recv() require.NoError(t, err) // Expect a constraint was returned for each projection. require.Equal(t, len(model.Bindings[0].Collection.Projections), - len(validateResp.Bindings[0].Constraints)) + len(validateResp.Validated.Bindings[0].Constraints)) for _, field := range fields.AllFields() { - var actual = validateResp.Bindings[0].Constraints[field].Type + var actual = validateResp.Validated.Bindings[0].Constraints[field].Type require.Equal( t, - pm.Constraint_FIELD_REQUIRED, + pm.Response_Validated_Constraint_FIELD_REQUIRED, actual, "wrong constraint for field: %s, expected FIELD_REQUIRED, got %s", field, @@ -209,7 +199,8 @@ func TestSQLiteDriver(t *testing.T) { } // The "number" field should be forbidden because it was not included in the FieldSelection that // was applied. - require.Equal(t, pm.Constraint_FIELD_FORBIDDEN, validateResp.Bindings[0].Constraints["number"].Type) + require.Equal(t, pm.Response_Validated_Constraint_FIELD_FORBIDDEN, + validateResp.Validated.Bindings[0].Constraints["number"].Type) // Insert a fixture into the `flow_checkpoints` table which we'll fence // and draw a checkpoint from, and then insert a more-specific checkpoint @@ -218,29 +209,33 @@ func TestSQLiteDriver(t *testing.T) { var db, err = sql.Open("sqlite3", endpointConfig.Path) require.NoError(t, err) + var cp = &pf.Checkpoint{ + Sources: map[pf.Journal]pc.Checkpoint_Source{"a/journal": {ReadThrough: 1}}, + } + var cpBytes, _ = cp.Marshal() + _, err = db.Exec(`INSERT INTO flow_checkpoints_v1 (materialization, key_begin, key_end, fence, checkpoint) VALUES (?, 0, ?, 5, ?) ;`, - applyReq.Materialization.Materialization, + applyReq.Materialization.Name, math.MaxUint32, - base64.StdEncoding.EncodeToString([]byte("initial checkpoint fixture")), + base64.StdEncoding.EncodeToString(cpBytes), ) require.NoError(t, err) require.NoError(t, db.Close()) } - transaction, err := driver.Transactions(ctx) - require.NoError(t, err) - // Send open. - err = transaction.Send(&pm.TransactionRequest{ - Open: &pm.TransactionRequest_Open{ - Materialization: applyReq.Materialization, - Version: "the-version", - KeyBegin: 100, - KeyEnd: 200, - DriverCheckpointJson: nil, + err = transaction.Send(&pm.Request{ + Open: &pm.Request_Open{ + Materialization: applyReq.Materialization, + Version: "the-version", + Range: &pf.RangeSpec{ + KeyBegin: 100, + KeyEnd: 200, + }, + StateJson: nil, }, }) require.NoError(t, err) @@ -248,13 +243,11 @@ func TestSQLiteDriver(t *testing.T) { // Receive Opened. opened, err := transaction.Recv() require.NoError(t, err) - require.Equal(t, &pm.TransactionResponse_Opened{ - RuntimeCheckpoint: []byte("initial checkpoint fixture"), - }, opened.Opened) + require.Contains(t, opened.Opened.RuntimeCheckpoint.Sources, pb.Journal("a/journal")) // Send & receive Acknowledge. - require.NoError(t, transaction.Send(&pm.TransactionRequest{ - Acknowledge: &pm.TransactionRequest_Acknowledge{}, + require.NoError(t, transaction.Send(&pm.Request{ + Acknowledge: &pm.Request_Acknowledge{}, })) acknowledged, err := transaction.Recv() require.NoError(t, err) @@ -263,20 +256,13 @@ func TestSQLiteDriver(t *testing.T) { // Test Load with keys that don't exist yet var key1 = tuple.Tuple{"key1Value"} var key2 = tuple.Tuple{"key2Value"} - err = transaction.Send(&pm.TransactionRequest{ - Load: newLoadReq(key1.Pack(), key2.Pack()), - }) - require.NoError(t, err) var key3 = tuple.Tuple{"key3Value"} - err = transaction.Send(&pm.TransactionRequest{ - Load: newLoadReq(key3.Pack()), - }) - require.NoError(t, err) + transaction.Send(&pm.Request{Load: &pm.Request_Load{KeyPacked: key1.Pack()}}) + transaction.Send(&pm.Request{Load: &pm.Request_Load{KeyPacked: key2.Pack()}}) + transaction.Send(&pm.Request{Load: &pm.Request_Load{KeyPacked: key3.Pack()}}) // Send Flush, which ends the Load phase. - err = transaction.Send(&pm.TransactionRequest{ - Flush: &pm.TransactionRequest_Flush{}, - }) + err = transaction.Send(&pm.Request{Flush: &pm.Request_Flush{}}) require.NoError(t, err) // Receive Flushed, which indicates that none of the documents exist @@ -289,35 +275,31 @@ func TestSQLiteDriver(t *testing.T) { var doc2 = `{ "theKey": "key2Value", "string": "bar", "bool": false, "int": 88, "number": 56.78 }` var doc3 = `{ "theKey": "key3Value", "string": "baz", "bool": false, "int": 99, "number": 0 }` - var store1 = pm.TransactionRequest_Store{} - store1.DocsJson = store1.Arena.AddAll([]byte(doc1), []byte(doc2)) - store1.PackedKeys = store1.Arena.AddAll(key1.Pack(), key2.Pack()) - store1.PackedValues = store1.Arena.AddAll( - tuple.Tuple{"foo", true, 77}.Pack(), - tuple.Tuple{"bar", false, 88}.Pack(), - ) - store1.Exists = []bool{false, false} - err = transaction.Send(&pm.TransactionRequest{ - Store: &store1, - }) - require.NoError(t, err) - - var store2 = pm.TransactionRequest_Store{} - store2.DocsJson = store2.Arena.AddAll([]byte(doc3)) - store2.PackedKeys = store2.Arena.AddAll(key3.Pack()) - store2.PackedValues = store2.Arena.AddAll( - tuple.Tuple{"baz", false, 99}.Pack(), - ) - store2.Exists = []bool{false} - err = transaction.Send(&pm.TransactionRequest{ - Store: &store2, - }) - require.NoError(t, err) + transaction.Send(&pm.Request{Store: &pm.Request_Store{ + KeyPacked: key1.Pack(), + ValuesPacked: tuple.Tuple{"foo", true, 77}.Pack(), + DocJson: []byte(doc1), + Exists: false, + }}) + transaction.Send(&pm.Request{Store: &pm.Request_Store{ + KeyPacked: key2.Pack(), + ValuesPacked: tuple.Tuple{"bar", false, 88}.Pack(), + DocJson: []byte(doc2), + Exists: false, + }}) + transaction.Send(&pm.Request{Store: &pm.Request_Store{ + KeyPacked: key3.Pack(), + ValuesPacked: tuple.Tuple{"baz", false, 99}.Pack(), + DocJson: []byte(doc3), + Exists: false, + }}) // Send StartCommit and receive StartedCommit. - var checkpoint1 = []byte("first checkpoint value") - err = transaction.Send(&pm.TransactionRequest{ - StartCommit: &pm.TransactionRequest_StartCommit{ + var checkpoint1 = &pf.Checkpoint{ + Sources: map[pf.Journal]pc.Checkpoint_Source{"a/journal": {ReadThrough: 111}}, + } + err = transaction.Send(&pm.Request{ + StartCommit: &pm.Request_StartCommit{ RuntimeCheckpoint: checkpoint1, }, }) @@ -328,34 +310,25 @@ func TestSQLiteDriver(t *testing.T) { require.NotNil(t, startedCommit.StartedCommit) // Send & receive Acknowledge. - require.NoError(t, transaction.Send(&pm.TransactionRequest{ - Acknowledge: &pm.TransactionRequest_Acknowledge{}, - })) + require.NoError(t, transaction.Send(&pm.Request{Acknowledge: &pm.Request_Acknowledge{}})) acknowledged, err = transaction.Recv() require.NoError(t, err) require.NotNil(t, acknowledged.Acknowledged, acknowledged) // Next transaction. Send some loads. - err = transaction.Send(&pm.TransactionRequest{ - Load: newLoadReq(key1.Pack(), key2.Pack(), key3.Pack()), - }) - require.NoError(t, err) + transaction.Send(&pm.Request{Load: &pm.Request_Load{KeyPacked: key1.Pack()}}) + transaction.Send(&pm.Request{Load: &pm.Request_Load{KeyPacked: key2.Pack()}}) + transaction.Send(&pm.Request{Load: &pm.Request_Load{KeyPacked: key3.Pack()}}) // Send Flush to drain the load phase. - err = transaction.Send(&pm.TransactionRequest{ - Flush: &pm.TransactionRequest_Flush{}, - }) + err = transaction.Send(&pm.Request{Flush: &pm.Request_Flush{}}) require.NoError(t, err) // Receive Loaded response, which is expected to contain our 3 documents. - loaded, err := transaction.Recv() - require.NoError(t, err) - require.NotNil(t, loaded.Loaded) - require.Equal(t, 3, len(loaded.Loaded.DocsJson)) - - for i, expected := range []string{doc1, doc2, doc3} { - var actual = loaded.Loaded.Arena.Bytes(loaded.Loaded.DocsJson[i]) - require.Equal(t, expected, string(actual)) + for _, expected := range []string{doc1, doc2, doc3} { + loaded, err := transaction.Recv() + require.NoError(t, err) + require.Equal(t, expected, string(loaded.Loaded.DocJson)) } // Receive Flushed @@ -368,24 +341,25 @@ func TestSQLiteDriver(t *testing.T) { var key4 = tuple.Tuple{"key4Value"} var doc4 = `{ "theKey": "key4Value" }` - var storeReq = pm.TransactionRequest_Store{} - storeReq.Exists = []bool{true, false} - storeReq.PackedKeys = storeReq.Arena.AddAll(key1.Pack(), key4.Pack()) - storeReq.PackedValues = storeReq.Arena.AddAll( - tuple.Tuple{"totally different", false, 33}.Pack(), - tuple.Tuple{nil, nil, nil}.Pack(), - ) - storeReq.DocsJson = storeReq.Arena.AddAll([]byte(newDoc1), []byte(doc4)) - - err = transaction.Send(&pm.TransactionRequest{ - Store: &storeReq, - }) - require.NoError(t, err) + transaction.Send(&pm.Request{Store: &pm.Request_Store{ + KeyPacked: key1.Pack(), + ValuesPacked: tuple.Tuple{"totally different", false, 33}.Pack(), + DocJson: []byte(newDoc1), + Exists: true, + }}) + transaction.Send(&pm.Request{Store: &pm.Request_Store{ + KeyPacked: key4.Pack(), + ValuesPacked: tuple.Tuple{nil, nil, nil}.Pack(), + DocJson: []byte(doc4), + Exists: false, + }}) // Commit transaction and assert we get a Committed. - var checkpoint2 = []byte("second checkpoint value") - err = transaction.Send(&pm.TransactionRequest{ - StartCommit: &pm.TransactionRequest_StartCommit{ + var checkpoint2 = &pf.Checkpoint{ + Sources: map[pf.Journal]pc.Checkpoint_Source{"a/journal": {ReadThrough: 222}}, + } + err = transaction.Send(&pm.Request{ + StartCommit: &pm.Request_StartCommit{ RuntimeCheckpoint: checkpoint2, }, }) @@ -396,34 +370,27 @@ func TestSQLiteDriver(t *testing.T) { require.NotNil(t, startedCommit.StartedCommit) // Send & receive Acknowledge. - require.NoError(t, transaction.Send(&pm.TransactionRequest{ - Acknowledge: &pm.TransactionRequest_Acknowledge{}, + require.NoError(t, transaction.Send(&pm.Request{ + Acknowledge: &pm.Request_Acknowledge{}, })) acknowledged, err = transaction.Recv() require.NoError(t, err) require.NotNil(t, acknowledged.Acknowledged, acknowledged) // One more transaction just to verify the updated documents - err = transaction.Send(&pm.TransactionRequest{ - Load: newLoadReq(key1.Pack(), key2.Pack(), key3.Pack(), key4.Pack()), - }) - require.NoError(t, err) + for _, key := range []tuple.Tuple{key1, key2, key3, key4} { + transaction.Send(&pm.Request{Load: &pm.Request_Load{KeyPacked: key.Pack()}}) + } // Send Flush. - err = transaction.Send(&pm.TransactionRequest{ - Flush: &pm.TransactionRequest_Flush{}, - }) + err = transaction.Send(&pm.Request{Flush: &pm.Request_Flush{}}) require.NoError(t, err) // Receive loads, and expect it contains 4 documents. - loaded, err = transaction.Recv() - require.NoError(t, err) - require.NotNil(t, loaded.Loaded) - require.Equal(t, 4, len(loaded.Loaded.DocsJson)) - - for i, expected := range []string{newDoc1, doc2, doc3, doc4} { - var actual = loaded.Loaded.Arena.Bytes(loaded.Loaded.DocsJson[i]) - require.Equal(t, expected, string(actual)) + for _, expected := range []string{newDoc1, doc2, doc3, doc4} { + loaded, err := transaction.Recv() + require.NoError(t, err) + require.Equal(t, expected, string(loaded.Loaded.DocJson)) } // Receive Flushed @@ -432,9 +399,11 @@ func TestSQLiteDriver(t *testing.T) { require.NotNil(t, flushed.Flushed, "unexpected message: %v+", flushed) // Send and receive StartCommit / StartedCommit. - var checkpoint3 = []byte("third checkpoint value") - require.NoError(t, transaction.Send(&pm.TransactionRequest{ - StartCommit: &pm.TransactionRequest_StartCommit{ + var checkpoint3 = &pf.Checkpoint{ + Sources: map[pf.Journal]pc.Checkpoint_Source{"a/journal": {ReadThrough: 333}}, + } + require.NoError(t, transaction.Send(&pm.Request{ + StartCommit: &pm.Request_StartCommit{ RuntimeCheckpoint: checkpoint3, }, })) @@ -443,8 +412,8 @@ func TestSQLiteDriver(t *testing.T) { require.NotNil(t, startedCommit.StartedCommit) // Send & receive a final Acknowledge. - require.NoError(t, transaction.Send(&pm.TransactionRequest{ - Acknowledge: &pm.TransactionRequest_Acknowledge{}, + require.NoError(t, transaction.Send(&pm.Request{ + Acknowledge: &pm.Request_Acknowledge{}, })) acknowledged, err = transaction.Recv() require.NoError(t, err) @@ -485,14 +454,6 @@ func TestSQLiteDriver(t *testing.T) { // Precondition: table states exist. verifyTableStatus(nil) - - // Apply a delete of the materialization. - applyResp, err = driver.ApplyDelete(ctx, &applyReq) - require.NoError(t, err) - require.NotEmpty(t, applyResp.ActionDescription) - - // Postcondition: all tables cleaned up. - verifyTableStatus(sql.ErrNoRows) } func dumpTables(t *testing.T, uri string, tables ...*sqlDriver.Table) string { @@ -506,12 +467,3 @@ func dumpTables(t *testing.T, uri string, tables ...*sqlDriver.Table) string { return out } - -func newLoadReq(keys ...[]byte) *pm.TransactionRequest_Load { - var arena pf.Arena - var packedKeys = arena.AddAll(keys...) - return &pm.TransactionRequest_Load{ - Arena: arena, - PackedKeys: packedKeys, - } -} diff --git a/go/materialize/driver/sqlite/validate_test.go b/go/materialize/driver/sqlite/validate_test.go index 2c0409df3d..f628ad3263 100644 --- a/go/materialize/driver/sqlite/validate_test.go +++ b/go/materialize/driver/sqlite/validate_test.go @@ -5,6 +5,7 @@ import ( "database/sql" "fmt" "path" + "strings" "testing" "github.com/bradleyjkemp/cupaloy" @@ -22,29 +23,37 @@ func TestValidations(t *testing.T) { FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "fixture", - Directory: t.TempDir(), + BuildDb: path.Join(t.TempDir(), "build.db"), Source: "file:///validate.flow.yaml", SourceType: pf.ContentType_CATALOG, }} require.NoError(t, bindings.BuildCatalog(args)) var collections []*pf.CollectionSpec - require.NoError(t, catalog.Extract(args.OutputPath(), func(db *sql.DB) (err error) { + require.NoError(t, catalog.Extract(args.BuildDb, func(db *sql.DB) (err error) { collections, err = catalog.LoadAllCollections(db) return err })) for _, spec := range collections { + if strings.HasPrefix(spec.Name.String(), "ops") { + continue + } t.Run( - fmt.Sprintf("NewSQLProjections-%s", path.Base(spec.Collection.String())), + fmt.Sprintf("NewSQLProjections-%s", path.Base(spec.Name.String())), func(t *testing.T) { constraints := sqlDriver.ValidateNewSQLProjections(spec, false) cupaloy.SnapshotT(t, constraints) }) } t.Run("MatchesExisting", func(t *testing.T) { - // Test body wants "weird-types/optionals", which orders as 1 alphabetically. - testMatchesExisting(t, collections[1]) + for _, c := range collections { + if c.Name == "weird-types/optionals" { + testMatchesExisting(t, c) + return + } + } + panic("not found") }) } @@ -79,15 +88,15 @@ func testMatchesExisting(t *testing.T, collection *pf.CollectionSpec) { for _, field := range req { var constraint, ok = constraints[field] require.True(t, ok, "constraint must be present for field '%s'", field) - require.Equal(t, pm.Constraint_FIELD_REQUIRED, constraint.Type) + require.Equal(t, pm.Response_Validated_Constraint_FIELD_REQUIRED, constraint.Type) } var intConstraint, ok = constraints["int"] require.True(t, ok, "missing constraint for 'int' field") - require.Equal(t, pm.Constraint_UNSATISFIABLE, intConstraint.Type) + require.Equal(t, pm.Response_Validated_Constraint_UNSATISFIABLE, intConstraint.Type) numConstraint, ok := constraints["number"] require.True(t, ok, "missing constraint for 'number' field") - require.Equal(t, pm.Constraint_FIELD_FORBIDDEN, numConstraint.Type) + require.Equal(t, pm.Response_Validated_Constraint_FIELD_FORBIDDEN, numConstraint.Type) var proposedSpec = pf.MaterializationSpec_Binding{ Collection: proposed, diff --git a/go/protocols/catalog/build_load.go b/go/protocols/catalog/build_load.go index f8698d1fba..ae6cdb6d40 100644 --- a/go/protocols/catalog/build_load.go +++ b/go/protocols/catalog/build_load.go @@ -98,23 +98,6 @@ func LoadCapture(db *sql.DB, name string) (*pf.CaptureSpec, error) { return out, loadOneSpec(db, `SELECT spec FROM built_captures WHERE capture = ?;`, out, name) } -// LoadAllDerivations loads all derivations. -func LoadAllDerivations(db *sql.DB) ([]*pf.DerivationSpec, error) { - var out []*pf.DerivationSpec - var err = loadSpecs(db, - `SELECT spec FROM built_derivations ORDER BY derivation ASC;`, - func() loadableSpec { return new(pf.DerivationSpec) }, - func(l loadableSpec) { out = append(out, l.(*pf.DerivationSpec)) }, - ) - return out, err -} - -// LoadDerivation by its name. -func LoadDerivation(db *sql.DB, name string) (*pf.DerivationSpec, error) { - var out = new(pf.DerivationSpec) - return out, loadOneSpec(db, `SELECT spec FROM built_derivations WHERE derivation = ?;`, out, name) -} - // LoadAllMaterializations loads all materializations. func LoadAllMaterializations(db *sql.DB) ([]*pf.MaterializationSpec, error) { var out []*pf.MaterializationSpec @@ -145,63 +128,6 @@ func LoadAllTests(db *sql.DB) ([]*pf.TestSpec, error) { return out, err } -// LoadNPMPackage loads the NPM package of the catalog. -func LoadNPMPackage(db *sql.DB) ([]byte, error) { - var out []byte - var err = db.QueryRow( - `SELECT content FROM resources WHERE content_type = '"NPM_PACKAGE"';`, - ).Scan(&out) - - if err != nil { - return nil, fmt.Errorf("loading NPM package: %w", err) - } - return out, nil -} - -// SchemaLocation is static inference of a location within a schema document. -type SchemaLocation struct { - // URL of the schema which is inferred, inclusive of any fragment pointer. - Schema string - // A location within a document verified by this schema, - // relative to the schema root. - Location string - // Inference at this schema location. - Spec pf.Inference -} - -// LoadAllInferences loads all inferences. -func LoadAllInferences(db *sql.DB) ([]SchemaLocation, error) { - var out []SchemaLocation - var err = loadRows(db, - `SELECT schema, location, spec FROM inferences ORDER BY schema, location ASC;`, - func() []interface{} { return []interface{}{new(string), new(string), new([]byte)} }, - func(l []interface{}) { - var loc = SchemaLocation{ - Schema: *l[0].(*string), - Location: *l[1].(*string), - } - if err := loc.Spec.Unmarshal(*l[2].(*[]byte)); err != nil { - panic(err) // TODO plumb this better. - } - out = append(out, loc) - }, - ) - return out, err -} - -// LoadSchemaBundle loads the bundle of schema documents. -// DEPRECATED. This is being kept as a short-term migration capability -// and can be removed after ~May 15th 2022. -func LoadSchemaBundle(db *sql.DB) (map[string]string, error) { - var out = make(map[string]string) - var err = loadRows(db, - `SELECT schema, dom FROM schema_docs;`, - func() []interface{} { return []interface{}{new(string), new(string)} }, - func(l []interface{}) { out[*l[0].(*string)] = *l[1].(*string) }, - ) - return out, err -} - type loadableSpec interface { Unmarshal([]byte) error Validate() error diff --git a/go/shuffle/.snapshots/TestSubscriberResponseStaging b/go/shuffle/.snapshots/TestSubscriberResponseStaging index df30c9b0dc..1f5aacc489 100644 --- a/go/shuffle/.snapshots/TestSubscriberResponseStaging +++ b/go/shuffle/.snapshots/TestSubscriberResponseStaging @@ -2,10 +2,10 @@ terminal_error: "an error" read_through: 1000 write_head: 2000 arena: "barbarACKACK" -docs_json: < +docs: < end: 3 > -docs_json: < +docs: < begin: 6 end: 9 > @@ -14,11 +14,11 @@ offsets: 201 offsets: 400 offsets: 401 uuid_parts: < - producer_and_flags: 1 + node: 1 clock: 160000 > uuid_parts: < - producer_and_flags: 2 + node: 2 clock: 160032 > packed_key: < @@ -33,10 +33,10 @@ terminal_error: "an error" read_through: 1000 write_head: 2000 arena: "qibqibACKACK" -docs_json: < +docs: < end: 3 > -docs_json: < +docs: < begin: 6 end: 9 > @@ -45,11 +45,11 @@ offsets: 301 offsets: 400 offsets: 401 uuid_parts: < - producer_and_flags: 1 + node: 1 clock: 160016 > uuid_parts: < - producer_and_flags: 2 + node: 2 clock: 160032 > packed_key: < @@ -64,10 +64,10 @@ terminal_error: "an error" read_through: 1000 write_head: 2000 arena: "ACKACKfoofoo" -docs_json: < +docs: < end: 3 > -docs_json: < +docs: < begin: 6 end: 9 > @@ -76,11 +76,11 @@ offsets: 401 offsets: 500 offsets: 501 uuid_parts: < - producer_and_flags: 2 + node: 2 clock: 160032 > uuid_parts: < - producer_and_flags: 1 + node: 1 clock: 160048 > packed_key: < diff --git a/go/shuffle/api_test.go b/go/shuffle/api_test.go index e38aaaa3a1..8a542dff6c 100644 --- a/go/shuffle/api_test.go +++ b/go/shuffle/api_test.go @@ -6,15 +6,16 @@ import ( "encoding/json" "fmt" "io" + "path" "testing" "github.com/estuary/flow/go/bindings" "github.com/estuary/flow/go/flow" "github.com/estuary/flow/go/labels" - "github.com/estuary/flow/go/ops" "github.com/estuary/flow/go/protocols/catalog" "github.com/estuary/flow/go/protocols/fdb/tuple" pf "github.com/estuary/flow/go/protocols/flow" + "github.com/estuary/flow/go/protocols/ops" "github.com/stretchr/testify/require" "go.gazette.dev/core/broker/client" pb "go.gazette.dev/core/broker/protocol" @@ -30,28 +31,32 @@ import ( ) func TestAPIIntegrationWithFixtures(t *testing.T) { + var dir = t.TempDir() var args = bindings.BuildArgs{ Context: context.Background(), FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "a-build-id", - Directory: t.TempDir(), + BuildDb: path.Join(dir, "a-build-db"), Source: "file:///ab.flow.yaml", SourceType: pf.ContentType_CATALOG, }} require.NoError(t, bindings.BuildCatalog(args)) - var derivation *pf.DerivationSpec - require.NoError(t, catalog.Extract(args.OutputPath(), func(db *sql.DB) (err error) { - derivation, err = catalog.LoadDerivation(db, "a/derivation") + var derivation *pf.CollectionSpec + require.NoError(t, catalog.Extract(args.BuildDb, func(db *sql.DB) (err error) { + derivation, err = catalog.LoadCollection(db, "a/derivation") return err })) + // TODO(johnny): update the fixture to make it validate as readOnly (SQL SELECT). + derivation.Derivation.Transforms[0].ReadOnly = true + var backgroundCtx = pb.WithDispatchDefault(context.Background()) var etcd = etcdtest.TestClient() defer etcdtest.Cleanup() - var builds, err = flow.NewBuildService("file://" + args.Directory + "/") + var builds, err = flow.NewBuildService("file://" + dir + "/") require.NoError(t, err) var bk = brokertest.NewBroker(t, etcd, "local", "broker") var journalSpec = brokertest.Journal(pb.JournalSpec{ @@ -80,7 +85,7 @@ func TestAPIIntegrationWithFixtures(t *testing.T) { var shuffle = pf.JournalShuffle{ Journal: "a/journal", Coordinator: "the-coordinator", - Shuffle: &derivation.Transforms[0].Shuffle, + Shuffle: derivation.TaskShuffles()[0], BuildId: "a-build-id", } @@ -180,7 +185,7 @@ func TestAPIIntegrationWithFixtures(t *testing.T) { AA string B string } - require.NoError(t, json.Unmarshal(msg.Arena.Bytes(msg.DocsJson[msg.Index]), &record)) + require.NoError(t, json.Unmarshal(msg.Arena.Bytes(msg.Docs[msg.Index]), &record)) require.Equal(t, 1, record.A) require.Equal(t, "1", record.AA) @@ -196,7 +201,7 @@ func TestAPIIntegrationWithFixtures(t *testing.T) { // Interlude: Another read, this time with an invalid schema. var badShuffle = shuffle - badShuffle.ValidateSchemaJson = `{"invalid":"keyword"}` + badShuffle.ValidateSchema = `{"invalid":"keyword"}` var badRead = &read{ publisher: localPublisher, @@ -227,7 +232,7 @@ func TestAPIIntegrationWithFixtures(t *testing.T) { out, err = tailStream.Recv() require.NoError(t, err) require.Len(t, out.UuidParts, 1) - require.True(t, message.Flags(out.UuidParts[0].ProducerAndFlags)&message.Flag_ACK_TXN != 0) + require.True(t, message.Flags(out.UuidParts[0].Node)&message.Flag_ACK_TXN != 0) // Cancel the server-side API context, then do a GracefulStop() (*not* a BoundedGracefulStop) // of the server. This will hang if the API doesn't properly unwind our in-flight tailing RPC. @@ -245,7 +250,7 @@ func TestAPIIntegrationWithFixtures(t *testing.T) { var localPublisher = ops.NewLocalPublisher( labels.ShardLabeling{ Build: "the-build", - LogLevel: pf.LogLevel_debug, + LogLevel: ops.Log_debug, Range: pf.RangeSpec{ KeyBegin: 0x00001111, KeyEnd: 0x11110000, @@ -253,6 +258,6 @@ var localPublisher = ops.NewLocalPublisher( RClockEnd: 0x22220000, }, TaskName: "some-tenant/task/name", - TaskType: labels.TaskTypeDerivation, + TaskType: ops.TaskType_derivation, }, ) diff --git a/go/shuffle/read.go b/go/shuffle/read.go index ca7e4209a4..6f9580fff9 100644 --- a/go/shuffle/read.go +++ b/go/shuffle/read.go @@ -10,8 +10,8 @@ import ( "github.com/estuary/flow/go/flow" "github.com/estuary/flow/go/labels" - "github.com/estuary/flow/go/ops" pf "github.com/estuary/flow/go/protocols/flow" + "github.com/estuary/flow/go/protocols/ops" "go.gazette.dev/core/allocator" pb "go.gazette.dev/core/broker/protocol" "go.gazette.dev/core/consumer" @@ -84,7 +84,7 @@ func NewReadBuilder( func (rb *ReadBuilder) ReadThrough(offsets pb.Offsets) (pb.Offsets, error) { var out = make(pb.Offsets, len(offsets)) var err = walkReads(rb.shardID, rb.members(), rb.journals, rb.shuffles, - func(_ pf.RangeSpec, spec pb.JournalSpec, _ *pf.Shuffle, _ pc.ShardID) { + func(_ pf.RangeSpec, spec pb.JournalSpec, shuffleIndex int, _ pc.ShardID) { if offset := offsets[spec.Name]; offset != 0 { // Prefer an offset that exactly matches our journal + metadata extension. out[spec.Name] = offset @@ -120,7 +120,7 @@ type read struct { func (rb *ReadBuilder) buildReplayRead(journal pb.Journal, begin, end pb.Offset) (*read, error) { var out *read var err = walkReads(rb.shardID, rb.members(), rb.journals, rb.shuffles, - func(range_ pf.RangeSpec, spec pb.JournalSpec, shuffle *pf.Shuffle, coordinator pc.ShardID) { + func(range_ pf.RangeSpec, spec pb.JournalSpec, shuffleIndex int, coordinator pc.ShardID) { if spec.Name != journal { return } @@ -128,7 +128,7 @@ func (rb *ReadBuilder) buildReplayRead(journal pb.Journal, begin, end pb.Offset) var journalShuffle = pf.JournalShuffle{ Journal: spec.Name, Coordinator: coordinator, - Shuffle: shuffle, + Shuffle: rb.shuffles[shuffleIndex], Replay: true, BuildId: rb.buildID, } @@ -141,7 +141,7 @@ func (rb *ReadBuilder) buildReplayRead(journal pb.Journal, begin, end pb.Offset) Offset: begin, EndOffset: end, }, - resp: pf.IndexedShuffleResponse{Shuffle: shuffle}, + resp: pf.IndexedShuffleResponse{ShuffleIndex: shuffleIndex}, readDelay: 0, // Not used during replay. } }) @@ -185,12 +185,12 @@ func (rb *ReadBuilder) buildReads( } err = walkReads(rb.shardID, rb.members(), rb.journals, rb.shuffles, - func(range_ pf.RangeSpec, spec pb.JournalSpec, shuffle *pf.Shuffle, coordinator pc.ShardID) { + func(range_ pf.RangeSpec, spec pb.JournalSpec, shuffleIndex int, coordinator pc.ShardID) { // Build the configuration under which we'll read. var journalShuffle = pf.JournalShuffle{ Journal: spec.Name, Coordinator: coordinator, - Shuffle: shuffle, + Shuffle: rb.shuffles[shuffleIndex], Replay: false, BuildId: rb.buildID, } @@ -202,7 +202,7 @@ func (rb *ReadBuilder) buildReads( if r.req.Shuffle.Equal(&journalShuffle) { delete(drain, spec.Name) } else { - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "draining read because its shuffle has changed", "next", map[string]interface{}{ "build": journalShuffle.BuildId, @@ -216,8 +216,8 @@ func (rb *ReadBuilder) buildReads( } // A *read of this journal doesn't exist. Start one. - var readDelay = message.NewClock(time.Unix(int64(shuffle.ReadDelaySeconds), 0)) - - message.NewClock(time.Unix(0, 0)) + var readDelaySeconds = int64(rb.shuffles[shuffleIndex].ReadDelaySeconds) + var readDelay = message.NewClock(time.Unix(readDelaySeconds, 0)) - message.NewClock(time.Unix(0, 0)) added[spec.Name] = &read{ publisher: rb.publisher, @@ -227,7 +227,7 @@ func (rb *ReadBuilder) buildReads( Range: range_, Offset: offsets[spec.Name], }, - resp: pf.IndexedShuffleResponse{Shuffle: shuffle}, + resp: pf.IndexedShuffleResponse{ShuffleIndex: shuffleIndex}, readDelay: readDelay, } }) @@ -248,7 +248,7 @@ func (r *read) start( case <-time.After(backoff(attempt)): } - r.log(pf.LogLevel_debug, "started shuffle read", "attempt", attempt) + r.log(ops.Log_debug, "started shuffle read", "attempt", attempt) ctx = pprof.WithLabels(ctx, pprof.Labels( "build", r.req.Shuffle.BuildId, @@ -354,7 +354,7 @@ func (r *read) sendReadResult(resp *pf.ShuffleResponse, err error, wakeCh chan<- var queue, cap = len(r.ch), cap(r.ch) if queue == cap { - r.log(pf.LogLevel_warn, + r.log(ops.Log_warn, "cancelling shuffle read due to full channel timeout", "queue", queue, "cap", cap, @@ -380,7 +380,7 @@ func (r *read) sendReadResult(resp *pf.ShuffleResponse, err error, wakeCh chan<- case <-timer.C: if queue > 13 { // Log values > 8s. - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "backpressure timer elapsed on a slow shuffle read", "queue", queue, "backoff", dur.Seconds(), @@ -412,7 +412,7 @@ func (r *read) sendReadResult(resp *pf.ShuffleResponse, err error, wakeCh chan<- // It's only used for replay reads and easier testing; // ongoing reads poll the read channel directly. func (r *read) next() (message.Envelope, error) { - for r.resp.Index == len(r.resp.DocsJson) { + for r.resp.Index == len(r.resp.Docs) { // We must receive from the channel. var rr, ok = <-r.ch if err := r.onRead(rr, ok); err == nil { @@ -456,7 +456,7 @@ func (r *read) dequeue() message.Envelope { return env } -func (r *read) log(lvl pf.LogLevel, message string, fields ...interface{}) { +func (r *read) log(lvl ops.Log_Level, message string, fields ...interface{}) { if lvl > r.publisher.Labels().LogLevel { return } @@ -509,7 +509,7 @@ func (h *readHeap) Pop() interface{} { } func walkReads(id pc.ShardID, shardSpecs []*pc.ShardSpec, allJournals flow.Journals, shuffles []*pf.Shuffle, - cb func(_ pf.RangeSpec, _ pb.JournalSpec, _ *pf.Shuffle, coordinator pc.ShardID)) error { + cb func(_ pf.RangeSpec, _ pb.JournalSpec, shuffleIndex int, coordinator pc.ShardID)) error { var members, err = newShuffleMembers(shardSpecs) if err != nil { @@ -525,7 +525,7 @@ func walkReads(id pc.ShardID, shardSpecs []*pc.ShardSpec, allJournals flow.Journ allJournals.Mu.RLock() defer allJournals.Mu.RUnlock() - for _, shuffle := range shuffles { + for shuffleIndex, shuffle := range shuffles { var prefix = allocator.ItemKey(allJournals.KeySpace, shuffle.SourceCollection.String()) + "/" var sources = allJournals.Prefixed(prefix) @@ -595,7 +595,7 @@ func walkReads(id pc.ShardID, shardSpecs []*pc.ShardSpec, allJournals flow.Journ copied.Name = pb.Journal(fmt.Sprintf("%s;%s", source.Name.String(), shuffle.GroupName)) var m = pickHRW(hrwHash(copied.Name.String()), members, start, stop) - cb(members[index].range_, copied, shuffle, members[m].spec.Id) + cb(members[index].range_, copied, shuffleIndex, members[m].spec.Id) } } return nil diff --git a/go/shuffle/read_test.go b/go/shuffle/read_test.go index 00cd8a89ff..10432955f3 100644 --- a/go/shuffle/read_test.go +++ b/go/shuffle/read_test.go @@ -81,7 +81,7 @@ func TestReadBuilding(t *testing.T) { Range: ranges, Offset: 1122, }, - resp: pf.IndexedShuffleResponse{Shuffle: shuffles[0]}, + resp: pf.IndexedShuffleResponse{ShuffleIndex: 0}, readDelay: 60e7 << 4, // 60 seconds as a message.Clock. }, }, added) @@ -114,7 +114,7 @@ func TestReadBuilding(t *testing.T) { Offset: 1000, EndOffset: 2000, }, - resp: pf.IndexedShuffleResponse{Shuffle: shuffles[0]}, + resp: pf.IndexedShuffleResponse{ShuffleIndex: 0}, readDelay: 0, }, r) @@ -410,9 +410,9 @@ func TestWalkingReads(t *testing.T) { // No additional reads for shard index == 2. var err = walkReads(shards[index].Id, shards, journals, shuffles, - func(_ pf.RangeSpec, spec pb.JournalSpec, shuffle *pf.Shuffle, coordinator pc.ShardID) { + func(_ pf.RangeSpec, spec pb.JournalSpec, shuffleIndex int, coordinator pc.ShardID) { require.Equal(t, expect[0].journal, spec.Name.String()) - require.Equal(t, expect[0].source, shuffle.SourceCollection.String()) + require.Equal(t, expect[0].source, shuffles[shuffleIndex].SourceCollection.String()) require.Equal(t, expect[0].coordinator, coordinator) expect = expect[1:] }) @@ -424,17 +424,17 @@ func TestWalkingReads(t *testing.T) { // portion of the key range is not covered by any shard. // This results in an error when walking with shuffle "bar-one" which uses the source key. var err = walkReads(shards[0].Id, shards[0:2], journals, shuffles[:1], - func(_ pf.RangeSpec, _ pb.JournalSpec, _ *pf.Shuffle, _ pc.ShardID) {}) + func(_ pf.RangeSpec, _ pb.JournalSpec, _ int, _ pc.ShardID) {}) require.EqualError(t, err, "none of 2 shards overlap the key-range of journal foo/bar=1/baz=abc/part=00, aaaaaaaa-ffffffff") // But is not an error with shuffle "baz-def", which *doesn't* use the source key. err = walkReads(shards[0].Id, shards[0:2], journals, shuffles[1:2], - func(_ pf.RangeSpec, _ pb.JournalSpec, _ *pf.Shuffle, _ pc.ShardID) {}) + func(_ pf.RangeSpec, _ pb.JournalSpec, _ int, _ pc.ShardID) {}) require.NoError(t, err) // Case: shard doesn't exist. err = walkReads("shard/deleted", shards, journals, shuffles, - func(_ pf.RangeSpec, _ pb.JournalSpec, _ *pf.Shuffle, _ pc.ShardID) {}) + func(_ pf.RangeSpec, _ pb.JournalSpec, _ int, _ pc.ShardID) {}) require.EqualError(t, err, "shard shard/deleted not found among shuffle members") } @@ -511,7 +511,7 @@ func TestShuffleMemberOrdering(t *testing.T) { "shard shard/3: expected estuary.dev/key-begin to be a 4-byte, hex encoded integer; got whoops") } -func buildReadTestJournalsAndTransforms() (flow.Journals, []*pc.ShardSpec, *pf.DerivationSpec) { +func buildReadTestJournalsAndTransforms() (flow.Journals, []*pc.ShardSpec, *pf.CollectionSpec) { var journals = flow.Journals{ KeySpace: &keyspace.KeySpace{Root: "/the/root"}} @@ -565,52 +565,49 @@ func buildReadTestJournalsAndTransforms() (flow.Journals, []*pc.ShardSpec, *pf.D } // Derivation fixture reading partitions of "foo" into derivation "der". - var task = &pf.DerivationSpec{ - Transforms: []pf.TransformSpec{ - { - Transform: "bar-one", - Shuffle: pf.Shuffle{ - GroupName: "transform/der/bar-one", - UsesSourceKey: true, + var task = &pf.CollectionSpec{ + Name: "der", + Derivation: &pf.CollectionSpec_Derivation{ + Transforms: []pf.CollectionSpec_Derivation_Transform{ + { + Name: "bar-one", ReadDelaySeconds: 60, - SourceCollection: "foo", - SourcePartitions: pb.LabelSelector{ + Collection: pf.CollectionSpec{Name: "foo"}, + PartitionSelector: pb.LabelSelector{ Include: pb.MustLabelSet(labels.FieldPrefix+"bar", "1"), }, + JournalReadSuffix: "transform/der/bar-one", }, - Derivation: "der", - }, - { - Transform: "baz-def", - Shuffle: pf.Shuffle{ - GroupName: "transform/der/baz-def", - UsesSourceKey: false, - SourceCollection: "foo", - SourcePartitions: pb.LabelSelector{ + { + Name: "baz-def", + ShuffleKey: []string{"/key"}, + Collection: pf.CollectionSpec{Name: "foo"}, + PartitionSelector: pb.LabelSelector{ Include: pb.MustLabelSet(labels.FieldPrefix+"baz", "def"), }, + JournalReadSuffix: "transform/der/baz-def", }, - Derivation: "der", - }, - { - Transform: "unmatched", - Shuffle: pf.Shuffle{ - GroupName: "transform/der/unmatched", - SourceCollection: "foo", - SourcePartitions: pb.LabelSelector{ + { + Name: "unmatched", + ShuffleKey: []string{"/key"}, + Collection: pf.CollectionSpec{Name: "foo"}, + PartitionSelector: pb.LabelSelector{ Include: pb.MustLabelSet(labels.FieldPrefix+"baz", "other-value"), }, + JournalReadSuffix: "transform/der/unmatched", }, - Derivation: "der", - }, - { - Transform: "partitions-cover", - Shuffle: pf.Shuffle{ - GroupName: "transform/der/partitions-cover", - SourceCollection: "foo", - ShuffleKeyPartitionFields: []string{"baz", "bar"}, + { + Name: "partitions-cover", + ShuffleKey: []string{"/baz", "/bar"}, + Collection: pf.CollectionSpec{ + Name: "foo", + Projections: []pf.Projection{ + {Ptr: "/bar", Field: "bar", IsPartitionKey: true}, + {Ptr: "/baz", Field: "baz", IsPartitionKey: true}, + }, + }, + JournalReadSuffix: "transform/der/partitions-cover", }, - Derivation: "der", }, }, } diff --git a/go/shuffle/reader.go b/go/shuffle/reader.go index f08139e5a6..12841b3780 100644 --- a/go/shuffle/reader.go +++ b/go/shuffle/reader.go @@ -9,6 +9,7 @@ import ( "github.com/estuary/flow/go/flow" pf "github.com/estuary/flow/go/protocols/flow" + "github.com/estuary/flow/go/protocols/ops" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" pb "go.gazette.dev/core/broker/protocol" @@ -144,7 +145,7 @@ func StartReplayRead(ctx context.Context, rb *ReadBuilder, journal pb.Journal, b // Other errors indicate a broken stream, but may be retried. // Stream is broken, but may be retried. - r.log(pf.LogLevel_warn, + r.log(ops.Log_warn, "shuffled replay read failed (will retry)", "error", err, "attempt", attempt, @@ -189,13 +190,13 @@ func (g *governor) next(ctx context.Context) (message.Envelope, error) { g.gated = append(g.gated, r) g.setPollState(r, pollStateGated) - r.log(pf.LogLevel_debug, "gated documents of journal", "until", readTime) + r.log(ops.Log_debug, "gated documents of journal", "until", readTime) continue } var env = r.dequeue() - if r.resp.Index != len(r.resp.DocsJson) { + if r.resp.Index != len(r.resp.Docs) { // Next document is available without polling. heap.Push(&g.queued, r) } else { @@ -270,9 +271,9 @@ func (g *governor) poll(ctx context.Context) error { // shard assignments change and the read is restarted against // an new coordinator. Other errors aren't as typical. if err != context.Canceled { - r.log(pf.LogLevel_warn, "shuffled read failed (will retry)", "error", err) + r.log(ops.Log_warn, "shuffled read failed (will retry)", "error", err) } else { - r.log(pf.LogLevel_debug, "shuffled read has drained") + r.log(ops.Log_debug, "shuffled read has drained") } // Clear tracking state for this drained read. @@ -287,11 +288,11 @@ func (g *governor) poll(ctx context.Context) error { return g.onConverge(ctx) } else if r.resp.TerminalError != "" { return fmt.Errorf(r.resp.TerminalError) - } else if len(r.resp.DocsJson) == 0 && r.resp.Tailing() { + } else if len(r.resp.Docs) == 0 && r.resp.Tailing() { // This is an empty read which informed us the reader is now tailing. // Leave it in pending, but return to attempt another read of the channel. return errPollAgain - } else if len(r.resp.DocsJson) == 0 { + } else if len(r.resp.Docs) == 0 { return fmt.Errorf("unexpected non-tailing empty ShuffleResponse") } else { // Successful read. Queue it for consumption. @@ -334,7 +335,7 @@ func (g *governor) onTick() error { for _, r := range g.gated { heap.Push(&g.queued, r) g.setPollState(r, pollStateReady) - r.log(pf.LogLevel_debug, "un-gated documents of journal", "now", g.wallTime) + r.log(ops.Log_debug, "un-gated documents of journal", "now", g.wallTime) } g.gated = g.gated[:0] @@ -365,7 +366,7 @@ func (g *governor) onConverge(ctx context.Context) error { } for _, r := range drain { - r.log(pf.LogLevel_debug, "cancelled shuffled read marked for draining") + r.log(ops.Log_debug, "cancelled shuffled read marked for draining") r.cancel() } diff --git a/go/shuffle/reader_test.go b/go/shuffle/reader_test.go index 21fa5f584e..fb9013a6db 100644 --- a/go/shuffle/reader_test.go +++ b/go/shuffle/reader_test.go @@ -7,6 +7,7 @@ import ( "fmt" "math" "math/rand" + "path" "testing" "time" @@ -63,20 +64,21 @@ func TestStuffedMessageChannel(t *testing.T) { } func TestConsumerIntegration(t *testing.T) { + var dir = t.TempDir() var args = bindings.BuildArgs{ Context: context.Background(), FileRoot: "./testdata", BuildAPI_Config: pf.BuildAPI_Config{ BuildId: "a-build-id", - Directory: t.TempDir(), + BuildDb: path.Join(dir, "a-build-id"), Source: "file:///ab.flow.yaml", SourceType: pf.ContentType_CATALOG, }} require.NoError(t, bindings.BuildCatalog(args)) - var derivation *pf.DerivationSpec - require.NoError(t, catalog.Extract(args.OutputPath(), func(db *sql.DB) (err error) { - derivation, err = catalog.LoadDerivation(db, "a/derivation") + var derivation *pf.CollectionSpec + require.NoError(t, catalog.Extract(args.BuildDb, func(db *sql.DB) (err error) { + derivation, err = catalog.LoadCollection(db, "a/derivation") return err })) @@ -86,7 +88,7 @@ func TestConsumerIntegration(t *testing.T) { var etcd = etcdtest.TestClient() defer etcdtest.Cleanup() - var builds, err = flow.NewBuildService("file://" + args.Directory + "/") + var builds, err = flow.NewBuildService("file://" + dir + "/") require.NoError(t, err) // Fixtures which parameterize the test: var ( @@ -351,8 +353,8 @@ func (a testApp) ConsumeMessage(shard consumer.Shard, store consumer.Store, env var key = msg.Arena.Bytes(msg.PackedKey[msg.Index]) state[hex.EncodeToString(key)]++ - if msg.Shuffle.GroupName != a.shuffles[0].GroupName { - return fmt.Errorf("expected Shuffle fixture to be passed-through") + if msg.ShuffleIndex != 0 { + return fmt.Errorf("expected ShuffleIndex to be zero (only one shuffle)") } return nil } diff --git a/go/shuffle/ring.go b/go/shuffle/ring.go index b996dc645c..9105edf050 100644 --- a/go/shuffle/ring.go +++ b/go/shuffle/ring.go @@ -11,8 +11,8 @@ import ( "github.com/estuary/flow/go/bindings" "github.com/estuary/flow/go/flow" - "github.com/estuary/flow/go/ops" pf "github.com/estuary/flow/go/protocols/flow" + "github.com/estuary/flow/go/protocols/ops" "github.com/pkg/errors" "go.gazette.dev/core/broker/client" pb "go.gazette.dev/core/broker/protocol" @@ -125,7 +125,7 @@ func (r *ring) onSubscribe(sub subscriber) { r.subscribers.prune() var rr = r.subscribers.add(sub) - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "added shuffle ring subscriber", "endOffset", sub.EndOffset, "offset", sub.Offset, @@ -146,7 +146,7 @@ func (r *ring) onSubscribe(sub subscriber) { go r.readDocuments(readCh, *rr) if rr.EndOffset != 0 { - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "started a catch-up journal read for new subscriber", "endOffset", rr.EndOffset, "offset", rr.Offset, @@ -162,7 +162,7 @@ func (r *ring) onRead(staged *pf.ShuffleResponse, ok bool, ex *bindings.Extracto r.readChans = r.readChans[:len(r.readChans)-1] if len(r.readChans) != 0 { - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "completed catch-up journal read", "reads", len(r.readChans), ) @@ -170,9 +170,9 @@ func (r *ring) onRead(staged *pf.ShuffleResponse, ok bool, ex *bindings.Extracto return } - if len(staged.DocsJson) != 0 { + if len(staged.Docs) != 0 { // Extract from staged documents. - for _, d := range staged.DocsJson { + for _, d := range staged.Docs { ex.Document(staged.Arena.Bytes(d)) } var uuids, fields, err = ex.Extract() @@ -194,7 +194,7 @@ func (r *ring) onExtract(staged *pf.ShuffleResponse, uuids []pf.UUIDParts, packe if staged.TerminalError == "" { staged.TerminalError = err.Error() } - r.log(pf.LogLevel_error, + r.log(ops.Log_error, "failed to extract from documents", "error", err, "readThrough", staged.ReadThrough, @@ -213,7 +213,7 @@ func (r *ring) onExtract(staged *pf.ShuffleResponse, uuids []pf.UUIDParts, packe func (r *ring) serve() { pprof.SetGoroutineLabels(r.ctx) - r.log(pf.LogLevel_debug, "started shuffle ring") + r.log(ops.Log_debug, "started shuffle ring") var ( build = r.coordinator.builds.Open(r.shuffle.BuildId) @@ -228,7 +228,7 @@ func (r *ring) serve() { } else if initErr = extractor.Configure( r.shuffle.SourceUuidPtr, r.shuffle.ShuffleKeyPtrs, - json.RawMessage(r.shuffle.ValidateSchemaJson), + json.RawMessage(r.shuffle.ValidateSchema), ); initErr != nil { initErr = fmt.Errorf("building document extractor: %w", initErr) } @@ -270,10 +270,10 @@ loop: sub.callback(nil, r.ctx.Err()) } - r.log(pf.LogLevel_debug, "stopped shuffle ring") + r.log(ops.Log_debug, "stopped shuffle ring") } -func (r *ring) log(lvl pf.LogLevel, message string, fields ...interface{}) { +func (r *ring) log(lvl ops.Log_Level, message string, fields ...interface{}) { if lvl > r.coordinator.publisher.Labels().LogLevel { return } @@ -300,7 +300,7 @@ func (r *ring) readDocuments(ch chan *pf.ShuffleResponse, req pb.ReadRequest) (_ "offset", fmt.Sprint(req.Offset), )), ) - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "started reading journal documents", "endOffset", req.EndOffset, "offset", req.Offset, @@ -321,7 +321,7 @@ func (r *ring) readDocuments(ch chan *pf.ShuffleResponse, req pb.ReadRequest) (_ var lastArena, lastDocs = 0, 0 defer func() { - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "finished reading journal documents", "endOffset", req.EndOffset, "error", __out, @@ -344,7 +344,7 @@ func (r *ring) readDocuments(ch chan *pf.ShuffleResponse, req pb.ReadRequest) (_ // bufio.Reader generates these when a read is restarted multiple // times with no actual bytes read (e.x. because the journal is idle). // It's safe to ignore. - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "multiple journal reads occurred without any progress", "endOffset", req.EndOffset, "offset", offset, @@ -353,7 +353,7 @@ func (r *ring) readDocuments(ch chan *pf.ShuffleResponse, req pb.ReadRequest) (_ line, err = nil, nil case client.ErrOffsetJump: // Offset jumps occur when fragments are removed from the middle of a journal. - r.log(pf.LogLevel_warn, + r.log(ops.Log_warn, "source journal offset jump", "from", offset, "to", rr.AdjustedOffset(br), @@ -365,7 +365,7 @@ func (r *ring) readDocuments(ch chan *pf.ShuffleResponse, req pb.ReadRequest) (_ // Continue reading, now with blocking reads. line, err, rr.Reader.Request.Block = nil, nil, true - r.log(pf.LogLevel_debug, + r.log(ops.Log_debug, "switched to blocking journal read", "endOffset", req.EndOffset, "offset", offset, @@ -387,7 +387,7 @@ func (r *ring) readDocuments(ch chan *pf.ShuffleResponse, req pb.ReadRequest) (_ // Would |line| cause a re-allocation of |out| ? if out.Arena == nil || line == nil || - (len(out.Arena)+len(line) <= cap(out.Arena) && len(out.DocsJson)+1 <= cap(out.DocsJson)) { + (len(out.Arena)+len(line) <= cap(out.Arena) && len(out.Docs)+1 <= cap(out.Docs)) { // It wouldn't, as |out| hasn't been allocated in the first place, // or it can be extended without re-allocation. } else { @@ -426,12 +426,12 @@ func (r *ring) readDocuments(ch chan *pf.ShuffleResponse, req pb.ReadRequest) (_ var docsCap = roundUpPow2(lastDocs, docsCapMin, docsCapMax) out.Arena = make([]byte, 0, arenaCap) - out.DocsJson = make([]pf.Slice, 0, docsCap) + out.Docs = make([]pf.Slice, 0, docsCap) out.Offsets = make([]int64, 0, 2*docsCap) } if line != nil { - out.DocsJson = append(out.DocsJson, out.Arena.Add(line)) + out.Docs = append(out.Docs, out.Arena.Add(line)) out.Offsets = append(out.Offsets, offset) offset = rr.AdjustedOffset(br) out.Offsets = append(out.Offsets, offset) @@ -443,7 +443,7 @@ func (r *ring) readDocuments(ch chan *pf.ShuffleResponse, req pb.ReadRequest) (_ out.ReadThrough = offset out.WriteHead = rr.Reader.Response.WriteHead - lastArena, lastDocs = len(out.Arena), len(out.DocsJson) + lastArena, lastDocs = len(out.Arena), len(out.Docs) // Place back onto channel (cannot block). ch <- out diff --git a/go/shuffle/ring_test.go b/go/shuffle/ring_test.go index 6e3aac0996..e57e7356f2 100644 --- a/go/shuffle/ring_test.go +++ b/go/shuffle/ring_test.go @@ -76,8 +76,8 @@ func TestReadingDocuments(t *testing.T) { for out := range ch { require.Equal(t, "", out.TerminalError) - if l := len(out.DocsJson); l > 0 { - require.Equal(t, record, out.Arena.Bytes(out.DocsJson[0]), record) + if l := len(out.Docs); l > 0 { + require.Equal(t, record, out.Arena.Bytes(out.Docs[0]), record) count -= l } // The final ShuffleResponse (only) should have the Tailing bit set. @@ -106,7 +106,7 @@ func TestReadingDocuments(t *testing.T) { var out = <-ch require.Equal(t, "", out.TerminalError) - require.Equal(t, [][]byte{record}, out.Arena.AllBytes(out.DocsJson...)) + require.Equal(t, [][]byte{record}, out.Arena.AllBytes(out.Docs...)) require.Equal(t, []pb.Offset{app.Response.Commit.Begin, app.Response.Commit.End}, out.Offsets) require.Equal(t, app.Response.Commit.End, out.ReadThrough) require.Equal(t, app.Response.Commit.End, out.WriteHead) @@ -122,7 +122,7 @@ func TestReadingDocuments(t *testing.T) { out = <-ch require.Equal(t, "unexpected EOF", out.TerminalError) - require.Equal(t, [][]byte{record[:20]}, out.Arena.AllBytes(out.DocsJson...)) + require.Equal(t, [][]byte{record[:20]}, out.Arena.AllBytes(out.Docs...)) require.Equal(t, []pb.Offset{0, 20}, out.Offsets) require.Equal(t, int64(20), out.ReadThrough) require.Equal(t, app.Response.Commit.End, out.WriteHead) @@ -145,13 +145,13 @@ func TestDocumentExtraction(t *testing.T) { }) var staged pf.ShuffleResponse - staged.DocsJson = staged.Arena.AddAll([]byte("doc-1\n"), []byte("doc-2\n")) + staged.Docs = staged.Arena.AddAll([]byte("doc-1\n"), []byte("doc-2\n")) // Case: extraction fails. r.onExtract(&staged, nil, nil, fmt.Errorf("an error")) require.Equal(t, pf.ShuffleResponse{ Arena: pf.Arena([]byte("doc-1\ndoc-2\n")), - DocsJson: []pf.Slice{{Begin: 0, End: 6}, {Begin: 6, End: 12}}, + Docs: []pf.Slice{{Begin: 0, End: 6}, {Begin: 6, End: 12}}, TerminalError: "an error", }, staged) staged.TerminalError = "" // Reset. @@ -168,7 +168,7 @@ func TestDocumentExtraction(t *testing.T) { require.Equal(t, pf.ShuffleResponse{ Arena: pf.Arena([]byte("doc-1\ndoc-2\n\025*\002some-string\000")), - DocsJson: []pf.Slice{{Begin: 0, End: 6}, {Begin: 6, End: 12}}, + Docs: []pf.Slice{{Begin: 0, End: 6}, {Begin: 6, End: 12}}, UuidParts: []pf.UUIDParts{{Clock: 123}, {Clock: 456}}, PackedKey: []pf.Slice{{Begin: 12, End: 14}, {Begin: 14, End: 27}}, }, staged) diff --git a/go/shuffle/subscriber.go b/go/shuffle/subscriber.go index a289a17442..6f944c72f0 100644 --- a/go/shuffle/subscriber.go +++ b/go/shuffle/subscriber.go @@ -33,8 +33,8 @@ func (s *subscriber) stageDoc(response *pf.ShuffleResponse, doc int) { var offset = response.Offsets[2*doc] if offset >= s.Offset && (s.EndOffset == 0 || offset < s.EndOffset) { - s.staged.DocsJson = append(s.staged.DocsJson, - s.staged.Arena.Add(response.Arena.Bytes(response.DocsJson[doc]))) + s.staged.Docs = append(s.staged.Docs, + s.staged.Arena.Add(response.Arena.Bytes(response.Docs[doc]))) s.staged.Offsets = append(s.staged.Offsets, offset, response.Offsets[2*doc+1]) s.staged.UuidParts = append(s.staged.UuidParts, response.UuidParts[doc]) s.staged.PackedKey = append(s.staged.PackedKey, @@ -120,7 +120,7 @@ func (s subscribers) stageResponses(from *pf.ShuffleResponse) { s[i].staged.WriteHead = from.WriteHead } for doc, uuid := range from.UuidParts { - if message.Flags(uuid.ProducerAndFlags) == message.Flag_ACK_TXN { + if message.Flags(uuid.Node) == message.Flag_ACK_TXN { // ACK documents are always broadcast to every subscriber. for i := range s { s[i].stageDoc(from, doc) @@ -255,7 +255,7 @@ func newStagedResponse(arenaEstimate, docsEstimate int) *pf.ShuffleResponse { return &pf.ShuffleResponse{ Arena: make([]byte, 0, arenaCap), - DocsJson: make([]pf.Slice, 0, docsCap), + Docs: make([]pf.Slice, 0, docsCap), Offsets: make([]int64, 0, 2*docsCap), UuidParts: make([]pf.UUIDParts, 0, docsCap), PackedKey: make([]pf.Slice, 0, docsCap), diff --git a/go/shuffle/subscriber_test.go b/go/shuffle/subscriber_test.go index 222e47c519..66b883e8cc 100644 --- a/go/shuffle/subscriber_test.go +++ b/go/shuffle/subscriber_test.go @@ -25,11 +25,11 @@ func simpleResponseFixture() *pf.ShuffleResponse { WriteHead: 600, Offsets: []pb.Offset{200, 300, 300, 400}, UuidParts: []pf.UUIDParts{ - {Clock: 1001, ProducerAndFlags: uint64(message.Flag_CONTINUE_TXN)}, - {Clock: 1002, ProducerAndFlags: uint64(message.Flag_CONTINUE_TXN)}, + {Clock: 1001, Node: uint64(message.Flag_CONTINUE_TXN)}, + {Clock: 1002, Node: uint64(message.Flag_CONTINUE_TXN)}, }, } - resp.DocsJson = resp.Arena.AddAll([]byte("one"), []byte("two")) + resp.Docs = resp.Arena.AddAll([]byte("one"), []byte("two")) resp.PackedKey = resp.Arena.AddAll([]byte("bb-cc-key"), []byte("more-key")) return resp } @@ -45,11 +45,11 @@ func TestSubscriberDocStaging(t *testing.T) { require.Equal(t, sub.staged.Offsets, []pb.Offset{200, 300, 300, 400}) require.Equal(t, sub.staged.UuidParts, []pf.UUIDParts{ - {Clock: 1001, ProducerAndFlags: uint64(message.Flag_CONTINUE_TXN)}, - {Clock: 1002, ProducerAndFlags: uint64(message.Flag_CONTINUE_TXN)}, + {Clock: 1001, Node: uint64(message.Flag_CONTINUE_TXN)}, + {Clock: 1002, Node: uint64(message.Flag_CONTINUE_TXN)}, }) require.Equal(t, [][]byte{[]byte("one"), []byte("two")}, - sub.staged.Arena.AllBytes(sub.staged.DocsJson...)) + sub.staged.Arena.AllBytes(sub.staged.Docs...)) require.Equal(t, [][]byte{[]byte("bb-cc-key"), []byte("more-key")}, sub.staged.Arena.AllBytes(sub.staged.PackedKey...)) @@ -264,14 +264,14 @@ func TestSubscriberResponseStaging(t *testing.T) { WriteHead: 2000, Offsets: []pb.Offset{200, 201, 300, 301, 400, 401, 500, 501, 600, 601}, UuidParts: []pf.UUIDParts{ - {Clock: 10000 << 4, ProducerAndFlags: uint64(message.Flag_CONTINUE_TXN)}, - {Clock: 10001 << 4, ProducerAndFlags: uint64(message.Flag_CONTINUE_TXN)}, - {Clock: 10002 << 4, ProducerAndFlags: uint64(message.Flag_ACK_TXN)}, - {Clock: 10003 << 4, ProducerAndFlags: uint64(message.Flag_CONTINUE_TXN)}, - {Clock: 10004 << 4, ProducerAndFlags: uint64(message.Flag_CONTINUE_TXN)}, + {Clock: 10000 << 4, Node: uint64(message.Flag_CONTINUE_TXN)}, + {Clock: 10001 << 4, Node: uint64(message.Flag_CONTINUE_TXN)}, + {Clock: 10002 << 4, Node: uint64(message.Flag_ACK_TXN)}, + {Clock: 10003 << 4, Node: uint64(message.Flag_CONTINUE_TXN)}, + {Clock: 10004 << 4, Node: uint64(message.Flag_CONTINUE_TXN)}, }, } - fixture.DocsJson = fixture.Arena.AddAll(tokens...) + fixture.Docs = fixture.Arena.AddAll(tokens...) fixture.PackedKey = fixture.Arena.AddAll(tokens...) s.stageResponses(&fixture) diff --git a/go/shuffle/testdata/ab.flow.yaml b/go/shuffle/testdata/ab.flow.yaml index 92b1ecf601..6d6785960c 100644 --- a/go/shuffle/testdata/ab.flow.yaml +++ b/go/shuffle/testdata/ab.flow.yaml @@ -17,15 +17,24 @@ collections: required: [a] key: [/a] - derivation: - typescript: - module: | - import something; - doSomething(); - transform: - fromCollection: - source: { name: a/collection } - publish: { lambda: typescript } + derive: + using: + typescript: + module: | + import { IDerivation, Document, SourceSwizzle } from 'flow/a/derivation.ts'; + + export class Derivation extends IDerivation { + swizzle(source: { doc: SourceSwizzle }): Document[] { + return []; + } + } + transforms: + # Note that our test depends on this transform being validated as `readOnly`. + # TODO(johnny): Switch this to a SQL SELECT that's actually read-only, + # rather than the current manual twiddling. + - name: swizzle + source: + name: a/collection storageMappings: "": { stores: [{ provider: S3, bucket: a-bucket }] } diff --git a/go/testing/driver.go b/go/testing/driver.go index 0676667bba..8558b56857 100644 --- a/go/testing/driver.go +++ b/go/testing/driver.go @@ -14,8 +14,8 @@ import ( "github.com/estuary/flow/go/bindings" "github.com/estuary/flow/go/flow" "github.com/estuary/flow/go/labels" - "github.com/estuary/flow/go/ops" pf "github.com/estuary/flow/go/protocols/flow" + "github.com/estuary/flow/go/protocols/ops" "github.com/nsf/jsondiff" log "github.com/sirupsen/logrus" "go.gazette.dev/core/broker/client" @@ -48,7 +48,7 @@ func NewClusterDriver( ) (*ClusterDriver, error) { var collectionIndex = make(map[pf.Collection]*pf.CollectionSpec, len(collections)) for _, spec := range collections { - collectionIndex[spec.Collection] = spec + collectionIndex[spec.Name] = spec } var driver = &ClusterDriver{ @@ -138,15 +138,15 @@ func (c *ClusterDriver) Stat(ctx context.Context, stat PendingStat) (readThrough // Ingest implements Driver for a Cluster. func (c *ClusterDriver) Ingest(ctx context.Context, test *pf.TestSpec, testStep int) (writeAt *Clock, _ error) { log.WithFields(log.Fields{ - "test": test.Test, + "test": test.Name, "testStep": testStep, }).Debug("starting ingest") var step = test.Steps[testStep] resp, err := c.tc.Ingest(ctx, &pf.IngestRequest{ - Collection: step.Collection, - BuildId: c.buildID, - DocsJsonLines: step.DocsJsonLines, + Collection: step.Collection, + BuildId: c.buildID, + DocsJsonVec: step.DocsJsonVec, }) if err != nil { @@ -157,7 +157,7 @@ func (c *ClusterDriver) Ingest(ctx context.Context, test *pf.TestSpec, testStep writeAt.ReduceMax(resp.JournalEtcd, resp.JournalWriteHeads) log.WithFields(log.Fields{ - "test": test.Test, + "test": test.Name, "testStep": testStep, "writeAt": *writeAt, }).Debug("ingest complete") @@ -180,7 +180,7 @@ func (c *ClusterDriver) Advance(ctx context.Context, delta TestTime) error { // Verify implements Driver for a Cluster. func (c *ClusterDriver) Verify(ctx context.Context, test *pf.TestSpec, testStep int, from, to *Clock) error { log.WithFields(log.Fields{ - "test": test.Test, + "test": test.Name, "testStep": testStep, }).Debug("starting verify") var step = test.Steps[testStep] @@ -198,10 +198,7 @@ func (c *ClusterDriver) Verify(ctx context.Context, test *pf.TestSpec, testStep return err } - var expected = strings.Split(step.DocsJsonLines, "\n") - if len(expected) == 1 && len(expected[0]) == 0 { - expected = nil // Split("") => [][]string{""} ; map to nil. - } + var expected = step.DocsJsonVec var diffOptions = jsondiff.DefaultConsoleOptions() // The default behavior of jsondiff is to compare the exact string representations of numbers. @@ -255,7 +252,7 @@ func (c *ClusterDriver) Verify(ctx context.Context, test *pf.TestSpec, testStep } log.WithFields(log.Fields{ - "test": test.Test, + "test": test.Name, "testStep": testStep, }).Debug("verify complete") return nil @@ -376,7 +373,7 @@ func combineDocumentsForVerify( collection *pf.CollectionSpec, documents [][]byte, ) ([]json.RawMessage, error) { - var publisher = ops.NewLocalPublisher(labels.ShardLabeling{}) + var publisher = ops.NewLocalPublisher(ops.ShardLabeling{}) // Feed documents into an extractor, to extract UUIDs. var extractor, err = bindings.NewExtractor(publisher) @@ -397,18 +394,18 @@ func combineDocumentsForVerify( if err != nil { return nil, fmt.Errorf("creating combiner: %w", err) } else if err = combiner.Configure( - collection.Collection.String(), - collection.Collection, + collection.Name.String(), + collection.Name, collection.GetReadSchemaJson(), collection.UuidPtr, - collection.KeyPtrs, + collection.Key, nil, // Don't extract additional fields. ); err != nil { return nil, fmt.Errorf("configuring combiner: %w", err) } for d := range documents { - if uuids[d].ProducerAndFlags&uint64(message.Flag_ACK_TXN) != 0 { + if uuids[d].Node&uint64(message.Flag_ACK_TXN) != 0 { continue } @@ -442,7 +439,7 @@ func Initialize(ctx context.Context, driver *ClusterDriver, graph *Graph) error // List journals of the collection. list, err := client.ListAllJournals(ctx, driver.rjc, flow.ListPartitionsRequest(collection)) if err != nil { - return fmt.Errorf("listing journals of %s: %w", collection.Collection, err) + return fmt.Errorf("listing journals of %s: %w", collection.Name, err) } // Fetch offsets of each journal. @@ -462,7 +459,7 @@ func Initialize(ctx context.Context, driver *ClusterDriver, graph *Graph) error } // Track it as a completed ingestion. - graph.CompletedIngest(collection.Collection, &Clock{Etcd: list.Header.Etcd, Offsets: offsets}) + graph.CompletedIngest(collection.Name, &Clock{Etcd: list.Header.Etcd, Offsets: offsets}) } // Run an empty test to poll all Stats implied by the completed ingests. diff --git a/go/testing/graph.go b/go/testing/graph.go index 9d691fe078..01a4fc4f56 100644 --- a/go/testing/graph.go +++ b/go/testing/graph.go @@ -58,7 +58,7 @@ type taskRead struct { // NewGraph constructs a new *Graph. func NewGraph( captures []*pf.CaptureSpec, - derivations []*pf.DerivationSpec, + collections []*pf.CollectionSpec, materializations []*pf.MaterializationSpec, ) *Graph { var g = &Graph{ @@ -73,8 +73,10 @@ func NewGraph( for _, t := range captures { g.addTask(t) } - for _, t := range derivations { - g.addTask(t) + for _, t := range collections { + if t.Derivation != nil { + g.addTask(t) + } } for _, t := range materializations { g.addTask(t) @@ -94,10 +96,10 @@ func (g *Graph) addTask(t pf.Task) { // Index into |outputs|. if capture, ok := t.(*pf.CaptureSpec); ok { for _, b := range capture.Bindings { - g.outputs[name] = append(g.outputs[name], b.Collection.Collection) + g.outputs[name] = append(g.outputs[name], b.Collection.Name) } - } else if derivation, ok := t.(*pf.DerivationSpec); ok { - g.outputs[name] = append(g.outputs[name], derivation.Collection.Collection) + } else if derivation, ok := t.(*pf.CollectionSpec); ok { + g.outputs[name] = append(g.outputs[name], derivation.Name) } // Index into |readers|. @@ -200,10 +202,10 @@ func (g *Graph) CompletedIngest(collection pf.Collection, writeClock *Clock) { } // CompletedStat tells the Graph of a completed task stat. -// * |readClock| is a min-reduced Clock over read progress across derivation shards. -// It's journals include group-name suffixes (as returned from Gazette's Stat). -// * |writeClock| is a max-reduced Clock over write progress across derivation shards. -// It's journals *don't* include group names (again, as returned from Gazette's Stat). +// - |readClock| is a min-reduced Clock over read progress across derivation shards. +// It's journals include group-name suffixes (as returned from Gazette's Stat). +// - |writeClock| is a max-reduced Clock over write progress across derivation shards. +// It's journals *don't* include group names (again, as returned from Gazette's Stat). func (g *Graph) CompletedStat(task TaskName, readClock *Clock, writeClock *Clock) { g.writeClock.ReduceMax(writeClock.Etcd, writeClock.Offsets) g.readThrough[task] = readClock // Track progress of this task. diff --git a/go/testing/graph_test.go b/go/testing/graph_test.go index 09ea7f577d..a4a69fad80 100644 --- a/go/testing/graph_test.go +++ b/go/testing/graph_test.go @@ -12,31 +12,34 @@ import ( ) func transformFixture(source pf.Collection, transform pf.Transform, - derivation pf.Collection, readDelay uint32) pf.TransformSpec { - - return pf.TransformSpec{ - Derivation: derivation, - Transform: transform, - Shuffle: pf.Shuffle{ - SourceCollection: source, - GroupName: fmt.Sprintf("derive/%s/%s", derivation, transform), - ReadDelaySeconds: readDelay, - }, + derivation pf.Collection, readDelay uint32) pf.CollectionSpec_Derivation_Transform { + + return pf.CollectionSpec_Derivation_Transform{ + Name: transform, + Collection: pf.CollectionSpec{Name: source}, + ReadDelaySeconds: readDelay, + JournalReadSuffix: fmt.Sprintf("derive/%s/%s", derivation, transform), + + // This is merely a place to retain `derivation` so we can group these + // later, and has no semantic association with an actual shuffle key. + ShuffleKey: []string{derivation.String()}, } } -func derivationsFixture(transforms ...pf.TransformSpec) []*pf.DerivationSpec { - var grouped = make(map[pf.Collection][]pf.TransformSpec) +func derivationsFixture(transforms ...pf.CollectionSpec_Derivation_Transform) []*pf.CollectionSpec { + var grouped = make(map[string][]pf.CollectionSpec_Derivation_Transform) for _, t := range transforms { - grouped[t.Derivation] = append(grouped[t.Derivation], t) + grouped[t.ShuffleKey[0]] = append(grouped[t.ShuffleKey[0]], t) } - var out []*pf.DerivationSpec - for _, group := range grouped { - out = append(out, &pf.DerivationSpec{ - Collection: pf.CollectionSpec{Collection: group[0].Derivation}, - Transforms: group, - ShardTemplate: &pc.ShardSpec{Disable: false}, + var out []*pf.CollectionSpec + for name, group := range grouped { + out = append(out, &pf.CollectionSpec{ + Name: pf.Collection(name), + Derivation: &pf.CollectionSpec_Derivation{ + Transforms: group, + ShardTemplate: &pc.ShardSpec{Disable: false}, + }, }) } return out @@ -253,56 +256,48 @@ func TestReadyStats(t *testing.T) { func TestTaskIndexing(t *testing.T) { var captures = []*pf.CaptureSpec{ { - Capture: "a/capture/task", + Name: "a/capture/task", Bindings: []*pf.CaptureSpec_Binding{ - {Collection: pf.CollectionSpec{Collection: "a/capture/one"}}, - {Collection: pf.CollectionSpec{Collection: "a/capture/two"}}, + {Collection: pf.CollectionSpec{Name: "a/capture/one"}}, + {Collection: pf.CollectionSpec{Name: "a/capture/two"}}, }, ShardTemplate: &pc.ShardSpec{Disable: false}, }, } - var derivations = []*pf.DerivationSpec{ + var derivations = []*pf.CollectionSpec{ { - Collection: pf.CollectionSpec{Collection: "a/derivation"}, - Transforms: []pf.TransformSpec{ - { - Shuffle: pf.Shuffle{ - SourceCollection: "a/capture/one", - GroupName: "derive/A", + Name: "a/derivation", + Derivation: &pf.CollectionSpec_Derivation{ + Transforms: []pf.CollectionSpec_Derivation_Transform{ + { + Collection: pf.CollectionSpec{Name: "a/capture/one"}, + JournalReadSuffix: "derive/A", }, - }, - { - Shuffle: pf.Shuffle{ - SourceCollection: "a/capture/one", - GroupName: "derive/AA", - ReadDelaySeconds: 5, + { + Collection: pf.CollectionSpec{Name: "a/capture/one"}, + JournalReadSuffix: "derive/AA", + ReadDelaySeconds: 5, }, - }, - { - Shuffle: pf.Shuffle{ - SourceCollection: "a/capture/two", - GroupName: "derive/B", + { + Collection: pf.CollectionSpec{Name: "a/capture/two"}, + JournalReadSuffix: "derive/B", }, }, + ShardTemplate: &pc.ShardSpec{Disable: false}, }, - ShardTemplate: &pc.ShardSpec{Disable: false}, }, } var materializations = []*pf.MaterializationSpec{ { - Materialization: "a/materialization", + Name: "a/materialization", Bindings: []*pf.MaterializationSpec_Binding{ { - Shuffle: pf.Shuffle{ - SourceCollection: "a/derivation", - GroupName: "mat/1", - }, + Collection: pf.CollectionSpec{Name: "a/derivation"}, + JournalReadSuffix: "mat/1", }, { - Shuffle: pf.Shuffle{ - SourceCollection: "a/capture/two", - GroupName: "mat/2", - }, + Collection: pf.CollectionSpec{Name: "a/capture/two"}, + JournalReadSuffix: "mat/2", }, }, ShardTemplate: &pc.ShardSpec{Disable: false},