@@ -191,6 +191,16 @@ func RestartNetwork(ctx context.Context, log logging.Logger, dir string) error {
191
191
return network .Restart (ctx )
192
192
}
193
193
194
+ // Restart the provided nodes. Blocks on the nodes accepting API requests but not their health.
195
+ func restartNodes (ctx context.Context , nodes ... * Node ) error {
196
+ for _ , node := range nodes {
197
+ if err := node .Restart (ctx ); err != nil {
198
+ return fmt .Errorf ("failed to restart node %s: %w" , node .NodeID , err )
199
+ }
200
+ }
201
+ return nil
202
+ }
203
+
194
204
// Reads a network from the provided directory.
195
205
func ReadNetwork (ctx context.Context , log logging.Logger , dir string ) (* Network , error ) {
196
206
canonicalDir , err := toCanonicalDir (dir )
@@ -441,26 +451,20 @@ func (n *Network) Bootstrap(ctx context.Context, log logging.Logger) error {
441
451
bootstrapNode .Flags [config .SybilProtectionEnabledKey ] = * existingSybilProtectionValue
442
452
}
443
453
454
+ // Ensure the bootstrap node is restarted to pick up subnet and chain configuration
455
+ //
456
+ // TODO(marun) This restart might be unnecessary if:
457
+ // - sybil protection didn't change
458
+ // - the node is not a subnet validator
444
459
log .Info ("restarting bootstrap node" ,
445
460
zap .Stringer ("nodeID" , bootstrapNode .NodeID ),
446
461
)
447
-
448
- if len (n .Nodes ) == 1 {
449
- // Ensure the node is restarted to pick up subnet and chain configuration
450
- return n .RestartNode (ctx , bootstrapNode )
462
+ if err := bootstrapNode .Restart (ctx ); err != nil {
463
+ return err
451
464
}
452
465
453
- // TODO(marun) This last restart of the bootstrap node might be unnecessary if:
454
- // - sybil protection didn't change
455
- // - the node is not a subnet validator
456
-
457
- // Ensure the bootstrap node is restarted to pick up configuration changes. Avoid using
458
- // RestartNode since the node won't be able to report healthy until other nodes are started.
459
- if err := bootstrapNode .Stop (ctx ); err != nil {
460
- return fmt .Errorf ("failed to stop node %s: %w" , bootstrapNode .NodeID , err )
461
- }
462
- if err := n .StartNode (ctx , bootstrapNode ); err != nil {
463
- return fmt .Errorf ("failed to start node %s: %w" , bootstrapNode .NodeID , err )
466
+ if len (n .Nodes ) == 1 {
467
+ return nil
464
468
}
465
469
466
470
log .Info ("starting remaining nodes" )
@@ -486,31 +490,6 @@ func (n *Network) StartNode(ctx context.Context, node *Node) error {
486
490
return nil
487
491
}
488
492
489
- // Restart a single node.
490
- func (n * Network ) RestartNode (ctx context.Context , node * Node ) error {
491
- runtimeConfig := node .getRuntimeConfig ()
492
- if runtimeConfig .Process != nil && runtimeConfig .Process .ReuseDynamicPorts {
493
- // Attempt to save the API port currently being used so the
494
- // restarted node can reuse it. This may result in the node
495
- // failing to start if the operating system allocates the port
496
- // to a different process between node stop and start.
497
- if err := node .SaveAPIPort (); err != nil {
498
- return err
499
- }
500
- }
501
-
502
- if err := node .Stop (ctx ); err != nil {
503
- return fmt .Errorf ("failed to stop node %s: %w" , node .NodeID , err )
504
- }
505
- if err := n .StartNode (ctx , node ); err != nil {
506
- return fmt .Errorf ("failed to start node %s: %w" , node .NodeID , err )
507
- }
508
- n .log .Info ("waiting for node to report healthy" ,
509
- zap .Stringer ("nodeID" , node .NodeID ),
510
- )
511
- return node .WaitForHealthy (ctx )
512
- }
513
-
514
493
// Stops all nodes in the network.
515
494
func (n * Network ) Stop (ctx context.Context ) error {
516
495
// Ensure the node state is up-to-date
@@ -540,11 +519,22 @@ func (n *Network) Stop(ctx context.Context) error {
540
519
return nil
541
520
}
542
521
543
- // Restarts all nodes in the network.
522
+ // Restarts all non-ephemeral nodes in the network.
544
523
func (n * Network ) Restart (ctx context.Context ) error {
545
524
n .log .Info ("restarting network" )
546
- for _ , node := range n .Nodes {
547
- if err := n .RestartNode (ctx , node ); err != nil {
525
+ if err := restartNodes (ctx , n .Nodes ... ); err != nil {
526
+ return err
527
+ }
528
+ return WaitForHealthyNodes (ctx , n .log , n .Nodes ... )
529
+ }
530
+
531
+ // Waits for the provided nodes to become healthy.
532
+ func WaitForHealthyNodes (ctx context.Context , log logging.Logger , nodes ... * Node ) error {
533
+ for _ , node := range nodes {
534
+ log .Info ("waiting for node to become healthy" ,
535
+ zap .Stringer ("nodeID" , node .NodeID ),
536
+ )
537
+ if err := node .WaitForHealthy (ctx ); err != nil {
548
538
return err
549
539
}
550
540
}
@@ -669,15 +659,20 @@ func (n *Network) CreateSubnets(ctx context.Context, log logging.Logger, apiURI
669
659
if restartRequired {
670
660
log .Info ("restarting node(s) to enable them to track the new subnet(s)" )
671
661
662
+ runningNodes := make ([]* Node , 0 , len (reconfiguredNodes ))
672
663
for _ , node := range reconfiguredNodes {
673
- if len (node .URI ) == 0 {
674
- // Only running nodes should be restarted
675
- continue
676
- }
677
- if err := n .RestartNode (ctx , node ); err != nil {
678
- return err
664
+ if len (node .URI ) > 0 {
665
+ runningNodes = append (runningNodes , node )
679
666
}
680
667
}
668
+
669
+ if err := restartNodes (ctx , runningNodes ... ); err != nil {
670
+ return err
671
+ }
672
+
673
+ if err := WaitForHealthyNodes (ctx , n .log , runningNodes ... ); err != nil {
674
+ return err
675
+ }
681
676
}
682
677
683
678
// Add validators for the subnet
@@ -738,15 +733,21 @@ func (n *Network) CreateSubnets(ctx context.Context, log logging.Logger, apiURI
738
733
log .Info ("restarting node(s) to pick up chain configuration" )
739
734
740
735
// Restart nodes to allow configuration for the new chains to take effect
736
+ nodesToRestart := make ([]* Node , 0 , len (n .Nodes ))
741
737
for _ , node := range n .Nodes {
742
- if ! validatorsToRestart .Contains (node .NodeID ) {
743
- continue
744
- }
745
- if err := n .RestartNode (ctx , node ); err != nil {
746
- return err
738
+ if validatorsToRestart .Contains (node .NodeID ) {
739
+ nodesToRestart = append (nodesToRestart , node )
747
740
}
748
741
}
749
742
743
+ if err := restartNodes (ctx , nodesToRestart ... ); err != nil {
744
+ return err
745
+ }
746
+
747
+ if err := WaitForHealthyNodes (ctx , log , nodesToRestart ... ); err != nil {
748
+ return err
749
+ }
750
+
750
751
return nil
751
752
}
752
753
0 commit comments