Skip to content

Commit cf9bf57

Browse files
craig[bot]miraradeva
andcommitted
139148: kv/kvnemesis: switch between lease types r=miraradeva a=miraradeva Fixes #125260. The main commit from #135044. This commit adds a new `ChangeSettingOperation` operation class to kvnemesis. It then adds the first variant of the operation, `SetLeaseType`. This operation changes the default range lease type to either expiration, epoch, or leader. This allows us to exercise lease type changes in kvnemesis. For #133891, we'll want to add a DRT operator which does something similar. Release note: None 139888: roachtest: deflake follower-reads/mixed-version/survival=region/locality=global/reads=strong r=miraradeva a=miraradeva Previously, this test failed due to replicas from the same region being considered unhealthy and re-ordered after replicas from other regions (in `transport.splitHealthy()`), resulting in elevated latency of the (cross-region) follower read. The issue is not present in non-mixed-version tests. Unlike the regular tests, the mixed-version tests invoke the test initialization, which includes ensuring upreplication and correct replica placement, at node startup but not before each follower-reads run. So, a node may consider a recently-restarted node unhealthy, and only the latter would have run the initialization steps on startup. This commit factors out the logic that ensures upreplicasion and correct replica placement, and invokes this logic not just on node startup but before each follower-reads run. Fixes: #139335 Fixes: #138076 Fixes: #136099 Fixes: #133520 Release note: None Co-authored-by: Mira Radeva <[email protected]>
3 parents 35a415a + 0910606 + 3b82faf commit cf9bf57

File tree

9 files changed

+152
-35
lines changed

9 files changed

+152
-35
lines changed

pkg/cmd/roachtest/tests/follower_reads.go

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,36 @@ func initFollowerReadsDB(
532532
require.NoError(t, err)
533533
}
534534

535+
ensureUpreplicationAndPlacement(ctx, t, l, topology, db)
536+
537+
const rows = 100
538+
const concurrency = 32
539+
sem := make(chan struct{}, concurrency)
540+
data = make(map[int]int64)
541+
insert := func(k int) task.Func {
542+
v := rng.Int63()
543+
data[k] = v
544+
return func(ctx context.Context, _ *logger.Logger) error {
545+
sem <- struct{}{}
546+
defer func() { <-sem }()
547+
_, err := db.ExecContext(ctx, "INSERT INTO mr_db.test VALUES ( $1, $2 )", k, v)
548+
return errors.Wrap(err, "failed to insert data")
549+
}
550+
}
551+
552+
// Insert the data.
553+
g := t.NewGroup(task.WithContext(ctx))
554+
for i := 0; i < rows; i++ {
555+
g.Go(insert(i))
556+
}
557+
g.Wait()
558+
559+
return data
560+
}
561+
562+
func ensureUpreplicationAndPlacement(
563+
ctx context.Context, t test.Test, l *logger.Logger, topology topologySpec, db *gosql.DB,
564+
) {
535565
// Wait until the table has completed up-replication.
536566
l.Printf("waiting for up-replication...")
537567
retryOpts := retry.Options{MaxBackoff: 15 * time.Second}
@@ -654,30 +684,6 @@ func initFollowerReadsDB(
654684
}
655685
}
656686
}
657-
658-
const rows = 100
659-
const concurrency = 32
660-
sem := make(chan struct{}, concurrency)
661-
data = make(map[int]int64)
662-
insert := func(k int) task.Func {
663-
v := rng.Int63()
664-
data[k] = v
665-
return func(ctx context.Context, _ *logger.Logger) error {
666-
sem <- struct{}{}
667-
defer func() { <-sem }()
668-
_, err := db.ExecContext(ctx, "INSERT INTO mr_db.test VALUES ( $1, $2 )", k, v)
669-
return errors.Wrap(err, "failed to insert data")
670-
}
671-
}
672-
673-
// Insert the data.
674-
g := t.NewGroup(task.WithContext(ctx))
675-
for i := 0; i < rows; i++ {
676-
g.Go(insert(i))
677-
}
678-
g.Wait()
679-
680-
return data
681687
}
682688

683689
func computeFollowerReadDuration(ctx context.Context, db *gosql.DB) (time.Duration, error) {
@@ -1052,6 +1058,7 @@ func runFollowerReadsMixedVersionTest(
10521058
}
10531059

10541060
runFollowerReads := func(ctx context.Context, l *logger.Logger, r *rand.Rand, h *mixedversion.Helper) error {
1061+
ensureUpreplicationAndPlacement(ctx, t, l, topology, h.Connect(1))
10551062
runFollowerReadsTest(ctx, t, l, c, r, topology, rc, data)
10561063
return nil
10571064
}

pkg/kv/kvnemesis/applier.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ func applyOp(ctx context.Context, env *Env, db *kv.DB, op *Operation) {
129129
case *TransferLeaseOperation:
130130
err := db.AdminTransferLease(ctx, o.Key, o.Target)
131131
o.Result = resultInit(ctx, err)
132+
case *ChangeSettingOperation:
133+
err := changeClusterSettingInEnv(ctx, env, o)
134+
o.Result = resultInit(ctx, err)
132135
case *ChangeZoneOperation:
133136
err := updateZoneConfigInEnv(ctx, env, o.Type)
134137
o.Result = resultInit(ctx, err)
@@ -683,6 +686,39 @@ func newGetReplicasFn(dbs ...*kv.DB) GetReplicasFn {
683686
}
684687
}
685688

689+
func changeClusterSettingInEnv(ctx context.Context, env *Env, op *ChangeSettingOperation) error {
690+
var settings map[string]string
691+
switch op.Type {
692+
case ChangeSettingType_SetLeaseType:
693+
switch op.LeaseType {
694+
case roachpb.LeaseExpiration:
695+
settings = map[string]string{
696+
"kv.lease.expiration_leases_only.enabled": "true",
697+
}
698+
case roachpb.LeaseEpoch:
699+
settings = map[string]string{
700+
"kv.lease.expiration_leases_only.enabled": "false",
701+
"kv.raft.leader_fortification.fraction_enabled": "0.0",
702+
}
703+
case roachpb.LeaseLeader:
704+
settings = map[string]string{
705+
"kv.lease.expiration_leases_only.enabled": "false",
706+
"kv.raft.leader_fortification.fraction_enabled": "1.0",
707+
}
708+
default:
709+
panic(errors.AssertionFailedf(`unknown LeaseType: %v`, op.LeaseType))
710+
}
711+
default:
712+
panic(errors.AssertionFailedf(`unknown ChangeSettingType: %v`, op.Type))
713+
}
714+
for name, val := range settings {
715+
if err := env.SetClusterSetting(ctx, name, val); err != nil {
716+
return err
717+
}
718+
}
719+
return nil
720+
}
721+
686722
func updateZoneConfig(zone *zonepb.ZoneConfig, change ChangeZoneType) {
687723
switch change {
688724
case ChangeZoneType_ToggleGlobalReads:

pkg/kv/kvnemesis/env.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,27 @@ func (e *Env) CheckConsistency(ctx context.Context, span roachpb.Span) []error {
9898
// SetClosedTimestampInterval sets the kv.closed_timestamp.target_duration
9999
// cluster setting to the provided duration.
100100
func (e *Env) SetClosedTimestampInterval(ctx context.Context, d time.Duration) error {
101-
q := fmt.Sprintf(`SET CLUSTER SETTING kv.closed_timestamp.target_duration = '%s'`, d)
102-
_, err := e.anyNode().ExecContext(ctx, q)
103-
return err
101+
return e.SetClusterSetting(ctx, "kv.closed_timestamp.target_duration", d.String())
104102
}
105103

106104
// ResetClosedTimestampInterval resets the kv.closed_timestamp.target_duration
107105
// cluster setting to its default value.
108106
func (e *Env) ResetClosedTimestampInterval(ctx context.Context) error {
109-
const q = `SET CLUSTER SETTING kv.closed_timestamp.target_duration TO DEFAULT`
107+
return e.SetClusterSettingToDefault(ctx, "kv.closed_timestamp.target_duration")
108+
}
109+
110+
// SetClusterSetting sets the cluster setting with the provided name to the
111+
// provided value.
112+
func (e *Env) SetClusterSetting(ctx context.Context, name, val string) error {
113+
q := fmt.Sprintf(`SET CLUSTER SETTING %s = '%s'`, name, val)
114+
_, err := e.anyNode().ExecContext(ctx, q)
115+
return err
116+
}
117+
118+
// SetClusterSettingToDefault resets the cluster setting with the provided name
119+
// to its default value.
120+
func (e *Env) SetClusterSettingToDefault(ctx context.Context, name string) error {
121+
q := fmt.Sprintf(`SET CLUSTER SETTING %s TO DEFAULT`, name)
110122
_, err := e.anyNode().ExecContext(ctx, q)
111123
return err
112124
}

pkg/kv/kvnemesis/generator.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ type OperationConfig struct {
5151
Merge MergeConfig
5252
ChangeReplicas ChangeReplicasConfig
5353
ChangeLease ChangeLeaseConfig
54+
ChangeSetting ChangeSettingConfig
5455
ChangeZone ChangeZoneConfig
5556
}
5657

@@ -324,6 +325,13 @@ type ChangeLeaseConfig struct {
324325
TransferLease int
325326
}
326327

328+
// ChangeSettingConfig configures the relative probability of generating a
329+
// cluster setting change operation.
330+
type ChangeSettingConfig struct {
331+
// SetLeaseType changes the default range lease type.
332+
SetLeaseType int
333+
}
334+
327335
// ChangeZoneConfig configures the relative probability of generating a zone
328336
// configuration change operation.
329337
type ChangeZoneConfig struct {
@@ -443,6 +451,9 @@ func newAllOperationsConfig() GeneratorConfig {
443451
ChangeLease: ChangeLeaseConfig{
444452
TransferLease: 1,
445453
},
454+
ChangeSetting: ChangeSettingConfig{
455+
SetLeaseType: 1,
456+
},
446457
ChangeZone: ChangeZoneConfig{
447458
ToggleGlobalReads: 1,
448459
},
@@ -683,6 +694,7 @@ func (g *generator) RandStep(rng *rand.Rand) Step {
683694
transferLeaseFn := makeTransferLeaseFn(key, append(voters, nonVoters...))
684695
addOpGen(&allowed, transferLeaseFn, g.Config.Ops.ChangeLease.TransferLease)
685696

697+
addOpGen(&allowed, setLeaseType, g.Config.Ops.ChangeSetting.SetLeaseType)
686698
addOpGen(&allowed, toggleGlobalReads, g.Config.Ops.ChangeZone.ToggleGlobalReads)
687699

688700
return step(g.selectOp(rng, allowed))
@@ -1452,6 +1464,14 @@ func makeTransferLeaseFn(key string, current []roachpb.ReplicationTarget) opGenF
14521464
}
14531465
}
14541466

1467+
func setLeaseType(_ *generator, rng *rand.Rand) Operation {
1468+
leaseTypes := roachpb.TestingAllLeaseTypes()
1469+
leaseType := leaseTypes[rng.Intn(len(leaseTypes))]
1470+
op := changeSetting(ChangeSettingType_SetLeaseType)
1471+
op.ChangeSetting.LeaseType = leaseType
1472+
return op
1473+
}
1474+
14551475
func toggleGlobalReads(_ *generator, _ *rand.Rand) Operation {
14561476
return changeZone(ChangeZoneType_ToggleGlobalReads)
14571477
}
@@ -1928,6 +1948,10 @@ func transferLease(key string, target roachpb.StoreID) Operation {
19281948
return Operation{TransferLease: &TransferLeaseOperation{Key: []byte(key), Target: target}}
19291949
}
19301950

1951+
func changeSetting(changeType ChangeSettingType) Operation {
1952+
return Operation{ChangeSetting: &ChangeSettingOperation{Type: changeType}}
1953+
}
1954+
19311955
func changeZone(changeType ChangeZoneType) Operation {
19321956
return Operation{ChangeZone: &ChangeZoneOperation{Type: changeType}}
19331957
}

pkg/kv/kvnemesis/generator_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,11 @@ func TestRandStep(t *testing.T) {
370370
}
371371
case *TransferLeaseOperation:
372372
counts.ChangeLease.TransferLease++
373+
case *ChangeSettingOperation:
374+
switch o.Type {
375+
case ChangeSettingType_SetLeaseType:
376+
counts.ChangeSetting.SetLeaseType++
377+
}
373378
case *ChangeZoneOperation:
374379
switch o.Type {
375380
case ChangeZoneType_ToggleGlobalReads:

pkg/kv/kvnemesis/operations.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ func (op Operation) Result() *Result {
4646
return &o.Result
4747
case *TransferLeaseOperation:
4848
return &o.Result
49+
case *ChangeSettingOperation:
50+
return &o.Result
4951
case *ChangeZoneOperation:
5052
return &o.Result
5153
case *BatchOperation:
@@ -144,6 +146,8 @@ func (op Operation) format(w *strings.Builder, fctx formatCtx) {
144146
o.format(w, fctx)
145147
case *TransferLeaseOperation:
146148
o.format(w, fctx)
149+
case *ChangeSettingOperation:
150+
o.format(w, fctx)
147151
case *ChangeZoneOperation:
148152
o.format(w, fctx)
149153
case *BatchOperation:
@@ -393,6 +397,16 @@ func (op TransferLeaseOperation) format(w *strings.Builder, fctx formatCtx) {
393397
op.Result.format(w)
394398
}
395399

400+
func (op ChangeSettingOperation) format(w *strings.Builder, fctx formatCtx) {
401+
switch op.Type {
402+
case ChangeSettingType_SetLeaseType:
403+
fmt.Fprintf(w, `env.SetClusterSetting(ctx, %s, %s)`, op.Type, op.LeaseType)
404+
default:
405+
panic(errors.AssertionFailedf(`unknown ChangeSettingType: %v`, op.Type))
406+
}
407+
op.Result.format(w)
408+
}
409+
396410
func (op ChangeZoneOperation) format(w *strings.Builder, fctx formatCtx) {
397411
fmt.Fprintf(w, `env.UpdateZoneConfig(ctx, %s)`, op.Type)
398412
op.Result.format(w)

pkg/kv/kvnemesis/operations.proto

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@ message TransferLeaseOperation {
119119
Result result = 3 [(gogoproto.nullable) = false];
120120
}
121121

122+
enum ChangeSettingType {
123+
SetLeaseType = 0;
124+
}
125+
126+
message ChangeSettingOperation {
127+
ChangeSettingType type = 1;
128+
int32 lease_type = 2 [(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.LeaseType"];
129+
Result result = 3 [(gogoproto.nullable) = false];
130+
}
131+
122132
enum ChangeZoneType {
123133
ToggleGlobalReads = 0;
124134
}
@@ -165,12 +175,13 @@ message Operation {
165175
MergeOperation merge = 14;
166176
ChangeReplicasOperation change_replicas = 15;
167177
TransferLeaseOperation transfer_lease = 16;
168-
ChangeZoneOperation change_zone = 17;
169-
AddSSTableOperation add_sstable = 18 [(gogoproto.customname) = "AddSSTable"];
170-
SavepointCreateOperation savepoint_create = 19;
171-
SavepointReleaseOperation savepoint_release = 20;
172-
SavepointRollbackOperation savepoint_rollback = 21;
173-
BarrierOperation barrier = 22;
178+
ChangeSettingOperation change_setting = 17;
179+
ChangeZoneOperation change_zone = 18;
180+
AddSSTableOperation add_sstable = 19 [(gogoproto.customname) = "AddSSTable"];
181+
SavepointCreateOperation savepoint_create = 20;
182+
SavepointReleaseOperation savepoint_release = 21;
183+
SavepointRollbackOperation savepoint_rollback = 22;
184+
BarrierOperation barrier = 23;
174185
}
175186

176187
enum ResultType {

pkg/kv/kvnemesis/validator.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,14 @@ func (v *validator) processOp(op Operation) {
888888
if !transferLeaseResultIsIgnorable(t.Result) {
889889
v.failIfError(op, t.Result) // fail on all other errors
890890
}
891+
case *ChangeSettingOperation:
892+
execTimestampStrictlyOptional = true
893+
// It's possible that reading the modified setting times out. Ignore these
894+
// errors for now, at least until we do some validation that depends on the
895+
// cluster settings being fully propagated.
896+
if !resultIsErrorStr(t.Result, `setting updated but timed out waiting to read new value`) {
897+
v.failIfError(op, t.Result)
898+
}
891899
case *ChangeZoneOperation:
892900
execTimestampStrictlyOptional = true
893901
v.failIfError(op, t.Result) // fail on all errors

pkg/roachpb/data.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1976,7 +1976,7 @@ func (l Lease) OwnedBy(storeID StoreID) bool {
19761976
// LeaseType describes the type of lease.
19771977
//
19781978
//go:generate stringer -type=LeaseType
1979-
type LeaseType int
1979+
type LeaseType int32
19801980

19811981
const (
19821982
// LeaseNone specifies no lease, to be used as a default value.

0 commit comments

Comments
 (0)