From d499bc3542dc0d3c9a51dfeb9037d343a93ccd2e Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Thu, 5 Dec 2024 11:58:21 +0100 Subject: [PATCH 1/3] Use fresh context while canceling a migration Signed-off-by: Rohit Nayak --- go/vt/vtctl/workflow/traffic_switcher.go | 26 +++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 937dffe70b3..09f33624b59 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1135,30 +1135,46 @@ func (ts *trafficSwitcher) switchDeniedTables(ctx context.Context) error { return nil } +// cancelMigration attempts to revert all changes made during the migration so that we can get back to the +// state when traffic switching (or reversing) was initiated. func (ts *trafficSwitcher) cancelMigration(ctx context.Context, sm *StreamMigrator) { var err error + + if ctx.Err() != nil { + // We are creating a new context for cancelMigration, but we still record any error, + // for any forensics in case of failures, to help determine whether the migration is being + // cancelled due to a client timeout or some other reason. + ts.Logger().Infof("In Cancel migration: original context invalid: %s", ctx.Err()) + } + + // We create a new context while canceling the migration, so that we are independent of the original + // context being cancelled prior to or during the cancel operation. + cmTimeout := 60 * time.Second + cmCtx, cmCancel := context.WithTimeout(context.Background(), cmTimeout) + defer cmCancel() + if ts.MigrationType() == binlogdatapb.MigrationType_TABLES { - err = ts.switchDeniedTables(ctx) + err = ts.switchDeniedTables(cmCtx) } else { - err = ts.changeShardsAccess(ctx, ts.SourceKeyspaceName(), ts.SourceShards(), allowWrites) + err = ts.changeShardsAccess(cmCtx, ts.SourceKeyspaceName(), ts.SourceShards(), allowWrites) } if err != nil { ts.Logger().Errorf("Cancel migration failed: %v", err) } - sm.CancelStreamMigrations(ctx) + sm.CancelStreamMigrations(cmCtx) err = ts.ForAllTargets(func(target *MigrationTarget) error { query := fmt.Sprintf("update _vt.vreplication set state='Running', message='' where db_name=%s and workflow=%s", encodeString(target.GetPrimary().DbName()), encodeString(ts.WorkflowName())) - _, err := ts.TabletManagerClient().VReplicationExec(ctx, target.GetPrimary().Tablet, query) + _, err := ts.TabletManagerClient().VReplicationExec(cmCtx, target.GetPrimary().Tablet, query) return err }) if err != nil { ts.Logger().Errorf("Cancel migration failed: could not restart vreplication: %v", err) } - err = ts.deleteReverseVReplication(ctx) + err = ts.deleteReverseVReplication(cmCtx) if err != nil { ts.Logger().Errorf("Cancel migration failed: could not delete reverse vreplication streams: %v", err) } From f54bf63b778c075c27d753b57aae0630face3c27 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 9 Dec 2024 08:53:39 +0100 Subject: [PATCH 2/3] Simplify comment Signed-off-by: Rohit Nayak --- go/vt/vtctl/workflow/traffic_switcher.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 09f33624b59..a060d0337b3 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1141,9 +1141,8 @@ func (ts *trafficSwitcher) cancelMigration(ctx context.Context, sm *StreamMigrat var err error if ctx.Err() != nil { - // We are creating a new context for cancelMigration, but we still record any error, - // for any forensics in case of failures, to help determine whether the migration is being - // cancelled due to a client timeout or some other reason. + // Even though we create a new context later on we still record any context error: + // for forensics in case of failures. ts.Logger().Infof("In Cancel migration: original context invalid: %s", ctx.Err()) } From 1ab5b140f1a16935478435f3ac7b608031e1e78a Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Tue, 10 Dec 2024 18:08:34 +0100 Subject: [PATCH 3/3] Address review comments Signed-off-by: Rohit Nayak --- go/vt/vtctl/workflow/traffic_switcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index a060d0337b3..4fc34992b0f 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1158,7 +1158,7 @@ func (ts *trafficSwitcher) cancelMigration(ctx context.Context, sm *StreamMigrat err = ts.changeShardsAccess(cmCtx, ts.SourceKeyspaceName(), ts.SourceShards(), allowWrites) } if err != nil { - ts.Logger().Errorf("Cancel migration failed: %v", err) + ts.Logger().Errorf("Cancel migration failed: could not revert denied tables / shard access: %v", err) } sm.CancelStreamMigrations(cmCtx)