@@ -58,6 +58,9 @@ type localRobot struct {
58
58
cloudConnSvc icloud.ConnectionService
59
59
logger logging.Logger
60
60
activeBackgroundWorkers sync.WaitGroup
61
+
62
+ // reconfigurationLock manages access to the resource graph and nodes. If either may change, this lock should be taken.
63
+ reconfigurationLock sync.Mutex
61
64
// reconfigureWorkers tracks goroutines spawned by reconfiguration functions. we only
62
65
// wait on this group in tests to prevent goleak-related failures. however, we do not
63
66
// wait on this group outside of testing, since the related goroutines may be running
@@ -172,7 +175,9 @@ func (r *localRobot) Close(ctx context.Context) error {
172
175
err = multierr .Combine (err , r .cloudConnSvc .Close (ctx ))
173
176
}
174
177
if r .manager != nil {
178
+ r .reconfigurationLock .Lock ()
175
179
err = multierr .Combine (err , r .manager .Close (ctx ))
180
+ r .reconfigurationLock .Unlock ()
176
181
}
177
182
if r .packageManager != nil {
178
183
err = multierr .Combine (err , r .packageManager .Close (ctx ))
@@ -307,6 +312,7 @@ func (r *localRobot) completeConfigWorker() {
307
312
trigger = "remote"
308
313
r .logger .CDebugw (r .closeContext , "configuration attempt triggered by remote" )
309
314
}
315
+ r .reconfigurationLock .Lock ()
310
316
anyChanges := r .manager .updateRemotesResourceNames (r .closeContext )
311
317
if r .manager .anyResourcesNotConfigured () {
312
318
anyChanges = true
@@ -316,6 +322,7 @@ func (r *localRobot) completeConfigWorker() {
316
322
r .updateWeakDependents (r .closeContext )
317
323
r .logger .CDebugw (r .closeContext , "configuration attempt completed with changes" , "trigger" , trigger )
318
324
}
325
+ r .reconfigurationLock .Unlock ()
319
326
}
320
327
}
321
328
@@ -440,6 +447,11 @@ func newWithResources(
440
447
if err != nil {
441
448
return nil , err
442
449
}
450
+
451
+ // now that we're changing the resource graph, take the reconfigurationLock so
452
+ // that other goroutines can't interleave
453
+ r .reconfigurationLock .Lock ()
454
+ defer r .reconfigurationLock .Unlock ()
443
455
if err := r .manager .resources .AddNode (
444
456
web .InternalServiceName ,
445
457
resource .NewConfiguredGraphNode (resource.Config {}, r .webSvc , builtinModel )); err != nil {
@@ -497,7 +509,7 @@ func newWithResources(
497
509
}, r .activeBackgroundWorkers .Done )
498
510
}
499
511
500
- r .Reconfigure (ctx , cfg )
512
+ r .reconfigure (ctx , cfg , false )
501
513
502
514
for name , res := range resources {
503
515
node := resource .NewConfiguredGraphNode (resource.Config {}, res , unknownModel )
@@ -529,6 +541,8 @@ func New(
529
541
func (r * localRobot ) removeOrphanedResources (ctx context.Context ,
530
542
rNames []resource.Name ,
531
543
) {
544
+ r .reconfigurationLock .Lock ()
545
+ defer r .reconfigurationLock .Unlock ()
532
546
r .manager .markResourcesRemoved (rNames , nil )
533
547
if err := r .manager .removeMarkedAndClose (ctx , nil ); err != nil {
534
548
r .logger .CErrorw (ctx , "error removing and closing marked resources" ,
@@ -1096,6 +1110,8 @@ func dialRobotClient(
1096
1110
// a best effort to remove no longer in use parts, but if it fails to do so, they could
1097
1111
// possibly leak resources. The given config may be modified by Reconfigure.
1098
1112
func (r * localRobot ) Reconfigure (ctx context.Context , newConfig * config.Config ) {
1113
+ r .reconfigurationLock .Lock ()
1114
+ defer r .reconfigurationLock .Unlock ()
1099
1115
r .reconfigure (ctx , newConfig , false )
1100
1116
}
1101
1117
@@ -1361,6 +1377,8 @@ func (r *localRobot) restartSingleModule(ctx context.Context, mod *config.Module
1361
1377
Modified : & config.ModifiedConfigDiff {},
1362
1378
Removed : & config.Config {},
1363
1379
}
1380
+ r .reconfigurationLock .Lock ()
1381
+ defer r .reconfigurationLock .Unlock ()
1364
1382
// note: if !isRunning (i.e. the module is in config but it crashed), putting it in diff.Modified
1365
1383
// results in a no-op; we use .Added instead.
1366
1384
if isRunning {
0 commit comments