Skip to content

[tmpnet] Avoid serializing the node data directory #3881

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/antithesis/init_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func initBootstrapDB(network *tmpnet.Network, destPath string) error {
}

// Copy the db state from the bootstrap node to the compose volume path.
sourcePath := filepath.Join(network.Nodes[0].GetDataDir(), "db")
sourcePath := filepath.Join(network.Nodes[0].DataDir, "db")
if err := os.MkdirAll(destPath, perms.ReadWriteExecute); err != nil {
return fmt.Errorf("failed to create db path %q: %w", destPath, err)
}
Expand Down
27 changes: 15 additions & 12 deletions tests/e2e/faultinjection/duplicate_node_id.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package faultinjection

import (
"context"
"fmt"

"github.com/onsi/ginkgo/v2"
"github.com/stretchr/testify/require"
Expand All @@ -27,23 +26,22 @@ var _ = ginkgo.Describe("Duplicate node handling", func() {
network := e2e.GetEnv(tc).GetNetwork()

tc.By("creating new node")
node1 := e2e.AddEphemeralNode(tc, network, tmpnet.FlagsMap{})
node1 := e2e.AddEphemeralNode(tc, network, tmpnet.NewEphemeralNode(tmpnet.FlagsMap{}))
e2e.WaitForHealthy(tc, node1)

tc.By("checking that the new node is connected to its peers")
checkConnectedPeers(tc, network.Nodes, node1)

tc.By("creating a second new node with the same staking keypair as the first new node")
node1Flags := node1.Flags
node2Flags := tmpnet.FlagsMap{
config.StakingTLSKeyContentKey: node1Flags[config.StakingTLSKeyContentKey],
config.StakingCertContentKey: node1Flags[config.StakingCertContentKey],
// Construct a unique data dir to ensure the two nodes' data will be stored
// separately. Usually the dir name is the node ID but in this one case the nodes have
// the same node ID.
config.DataDirKey: fmt.Sprintf("%s-second", node1Flags[config.DataDirKey]),
}
node2 := e2e.AddEphemeralNode(tc, network, node2Flags)
node2 := tmpnet.NewEphemeralNode(tmpnet.FlagsMap{
config.StakingTLSKeyContentKey: node1.Flags[config.StakingTLSKeyContentKey],
config.StakingCertContentKey: node1.Flags[config.StakingCertContentKey],
})
// Construct a unique data dir to ensure the two nodes' data will be stored
// separately. Usually the dir name is the node ID but in this one case the nodes have
// the same node ID.
node2.DataDir = node1.DataDir + "-second"
_ = e2e.AddEphemeralNode(tc, network, node2)

tc.By("checking that the second new node fails to become healthy before timeout")
err := tmpnet.WaitForHealthy(tc.DefaultContext(), node2)
Expand Down Expand Up @@ -77,6 +75,11 @@ func checkConnectedPeers(tc tests.TestContext, existingNodes []*tmpnet.Node, new
}

for _, existingNode := range existingNodes {
if existingNode.IsEphemeral {
// Ephemeral nodes may not be running
continue
}

// Check that the existing node is a peer of the new node
require.True(peerIDs.Contains(existingNode.NodeID))

Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/p/interchain_workflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ var _ = e2e.DescribePChain("[Interchain Workflow]", ginkgo.Label(e2e.UsesCChainL
})

tc.By("adding new node and waiting for it to report healthy")
node := e2e.AddEphemeralNode(tc, network, tmpnet.FlagsMap{})
node := e2e.AddEphemeralNode(tc, network, tmpnet.NewEphemeralNode(tmpnet.FlagsMap{}))
e2e.WaitForHealthy(tc, node)

tc.By("retrieving new node's id and pop")
Expand Down
8 changes: 4 additions & 4 deletions tests/e2e/p/l1.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ var _ = e2e.DescribePChain("[L1]", func() {
})

tc.By("creating the genesis validator")
subnetGenesisNode := e2e.AddEphemeralNode(tc, env.GetNetwork(), tmpnet.FlagsMap{
subnetGenesisNode := e2e.AddEphemeralNode(tc, env.GetNetwork(), tmpnet.NewEphemeralNode(tmpnet.FlagsMap{
config.TrackSubnetsKey: subnetID.String(),
})
}))

genesisNodePoP, err := subnetGenesisNode.GetProofOfPossession()
require.NoError(err)
Expand Down Expand Up @@ -348,9 +348,9 @@ var _ = e2e.DescribePChain("[L1]", func() {
tc.By("advancing the proposervm P-chain height", advanceProposerVMPChainHeight)

tc.By("creating the validator to register")
subnetRegisterNode := e2e.AddEphemeralNode(tc, env.GetNetwork(), tmpnet.FlagsMap{
subnetRegisterNode := e2e.AddEphemeralNode(tc, env.GetNetwork(), tmpnet.NewEphemeralNode(tmpnet.FlagsMap{
config.TrackSubnetsKey: subnetID.String(),
})
}))

registerNodePoP, err := subnetRegisterNode.GetProofOfPossession()
require.NoError(err)
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/p/staking_rewards.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ var _ = ginkgo.Describe("[Staking Rewards]", func() {
})

tc.By("adding alpha node, whose uptime should result in a staking reward")
alphaNode := e2e.AddEphemeralNode(tc, network, tmpnet.FlagsMap{})
alphaNode := e2e.AddEphemeralNode(tc, network, tmpnet.NewEphemeralNode(tmpnet.FlagsMap{}))
tc.By("adding beta node, whose uptime should not result in a staking reward")
betaNode := e2e.AddEphemeralNode(tc, network, tmpnet.FlagsMap{})
betaNode := e2e.AddEphemeralNode(tc, network, tmpnet.NewEphemeralNode(tmpnet.FlagsMap{}))

// Wait to check health until both nodes have started to minimize the duration
// required for both nodes to report healthy.
Expand Down
3 changes: 1 addition & 2 deletions tests/fixture/e2e/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,9 @@ func NewEthClient(tc tests.TestContext, nodeURI tmpnet.NodeURI) ethclient.Client
}

// Adds an ephemeral node intended to be used by a single test.
func AddEphemeralNode(tc tests.TestContext, network *tmpnet.Network, flags tmpnet.FlagsMap) *tmpnet.Node {
func AddEphemeralNode(tc tests.TestContext, network *tmpnet.Network, node *tmpnet.Node) *tmpnet.Node {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The need to be able to set the DataDir suggested accepting a node instead of a FlagsMap here.

require := require.New(tc)

node := tmpnet.NewEphemeralNode(flags)
require.NoError(network.StartNode(tc.DefaultContext(), tc.Log(), node))

tc.DeferCleanup(func() {
Expand Down
61 changes: 24 additions & 37 deletions tests/fixture/tmpnet/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,6 @@ func (n *Network) EnsureDefaultConfig(log logging.Logger) error {
n.PrimaryChainConfigs[alias].SetDefaults(chainConfig)
}

// Ensure nodes are configured
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discovered during testing that this configuration is duplicative - it's already being done on network bootstrap, node start, and node read

for i := range n.Nodes {
if err := n.EnsureNodeConfig(n.Nodes[i]); err != nil {
return err
}
}

return nil
}

Expand Down Expand Up @@ -503,23 +496,22 @@ func (n *Network) RestartNode(ctx context.Context, log logging.Logger, node *Nod

// Stops all nodes in the network.
func (n *Network) Stop(ctx context.Context) error {
// Target all nodes, including the ephemeral ones
nodes, err := ReadNodes(n, true /* includeEphemeral */)
if err != nil {
// Ensure the node state is up-to-date
if err := n.readNodes(); err != nil {
return err
}

var errs []error

// Initiate stop on all nodes
for _, node := range nodes {
for _, node := range n.Nodes {
if err := node.InitiateStop(ctx); err != nil {
errs = append(errs, fmt.Errorf("failed to stop node %s: %w", node.NodeID, err))
}
}

// Wait for stop to complete on all nodes
for _, node := range nodes {
for _, node := range n.Nodes {
if err := node.WaitForStopped(ctx); err != nil {
errs = append(errs, fmt.Errorf("failed to wait for node %s to stop: %w", node.NodeID, err))
}
Expand All @@ -543,8 +535,7 @@ func (n *Network) Restart(ctx context.Context, log logging.Logger) error {
}

// Ensures the provided node has the configuration it needs to start. If the data dir is not
// set, it will be defaulted to [nodeParentDir]/[node ID]. For a not-yet-created network,
// no action will be taken.
// set, it will be defaulted to [nodeParentDir]/[node ID].
func (n *Network) EnsureNodeConfig(node *Node) error {
// Ensure the node has access to network configuration
node.network = n
Expand All @@ -553,14 +544,9 @@ func (n *Network) EnsureNodeConfig(node *Node) error {
return err
}

if len(n.Dir) > 0 {
// Ensure the node's data dir is configured
dataDir := node.GetDataDir()
if len(dataDir) == 0 {
// NodeID will have been set by EnsureKeys
dataDir = filepath.Join(n.Dir, node.NodeID.String())
node.Flags[config.DataDirKey] = dataDir
}
// Ensure a data directory if not already set
if len(node.DataDir) == 0 {
node.DataDir = filepath.Join(n.Dir, node.NodeID.String())
}

return nil
Expand Down Expand Up @@ -767,16 +753,13 @@ func (n *Network) GetNodeURIs() []NodeURI {
// collecting the bootstrap details for restarting a node).
// For consumption outside of avalanchego. Needs to be kept exported.
func (n *Network) GetBootstrapIPsAndIDs(skippedNode *Node) ([]string, []string, error) {
// Collect staking addresses of non-ephemeral nodes for use in bootstrapping a node
nodes, err := ReadNodes(n, false /* includeEphemeral */)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there's much value in forcing a read here. The network can always be reloaded to achieve a similar result.

if err != nil {
return nil, nil, fmt.Errorf("failed to read network's nodes: %w", err)
}
var (
bootstrapIPs = make([]string, 0, len(nodes))
bootstrapIDs = make([]string, 0, len(nodes))
)
for _, node := range nodes {
bootstrapIPs := []string{}
bootstrapIDs := []string{}
for _, node := range n.Nodes {
if node.IsEphemeral {
// Ephemeral nodes are not guaranteed to stay running
continue
}
if skippedNode != nil && node.NodeID == skippedNode.NodeID {
continue
}
Expand Down Expand Up @@ -934,12 +917,16 @@ func (n *Network) writeNodeFlags(log logging.Logger, node *Node) error {
// Only configure the plugin dir with a non-empty value to ensure the use of
// the default value (`[datadir]/plugins`) when no plugin dir is configured.
processConfig := node.getRuntimeConfig().Process
if processConfig != nil && len(processConfig.PluginDir) > 0 {
// Ensure the plugin directory exists or the node will fail to start
if err := os.MkdirAll(processConfig.PluginDir, perms.ReadWriteExecute); err != nil {
return fmt.Errorf("failed to create plugin dir: %w", err)
if processConfig != nil {
if len(processConfig.PluginDir) > 0 {
// Ensure the plugin directory exists or the node will fail to start
if err := os.MkdirAll(processConfig.PluginDir, perms.ReadWriteExecute); err != nil {
return fmt.Errorf("failed to create plugin dir: %w", err)
}
flags.SetDefault(config.PluginDirKey, processConfig.PluginDir)
}
flags.SetDefault(config.PluginDirKey, processConfig.PluginDir)

flags.SetDefault(config.DataDirKey, node.DataDir)
Copy link
Contributor Author

@maru-ava maru-ava Apr 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The kube runtime will set its own data dir since it uses a different filesystem

}

// Set the network and tmpnet defaults last to ensure they can be overridden
Expand Down
42 changes: 32 additions & 10 deletions tests/fixture/tmpnet/network_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,35 @@ func (n *Network) readNetwork() error {
return n.readConfig()
}

// Read the non-ephemeral nodes associated with the network from disk.
// Read the nodes associated with the network from disk.
func (n *Network) readNodes() error {
nodes, err := ReadNodes(n, false /* includeEphemeral */)
nodes := []*Node{}

// Node configuration is stored in child directories
entries, err := os.ReadDir(n.Dir)
if err != nil {
return err
return fmt.Errorf("failed to read dir: %w", err)
}
for _, entry := range entries {
if !entry.IsDir() {
continue
}

node := NewNode()
dataDir := filepath.Join(n.Dir, entry.Name())
err := node.Read(n, dataDir)
if errors.Is(err, os.ErrNotExist) {
// If no config file exists, assume this is not the path of a node
continue
} else if err != nil {
return err
}

nodes = append(nodes, node)
}

n.Nodes = nodes

return nil
}

Expand Down Expand Up @@ -128,13 +150,13 @@ func (n *Network) readConfig() error {

// The subset of network fields to store in the network config file.
type serializedNetworkConfig struct {
UUID string `json:",omitempty"`
Owner string `json:",omitempty"`
PrimarySubnetConfig FlagsMap `json:",omitempty"`
PrimaryChainConfigs map[string]FlagsMap `json:",omitempty"`
DefaultFlags FlagsMap `json:",omitempty"`
DefaultRuntimeConfig NodeRuntimeConfig `json:",omitempty"`
PreFundedKeys []*secp256k1.PrivateKey `json:",omitempty"`
UUID string `json:"uuid,omitempty"`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reviewing the serialized json, realized that fields should all be in lowerCamelCase

Owner string `json:"owner,omitempty"`
PrimarySubnetConfig FlagsMap `json:"primarySubnetConfig,omitempty"`
PrimaryChainConfigs map[string]FlagsMap `json:"primaryChainConfigs,omitempty"`
DefaultFlags FlagsMap `json:"defaultFlags,omitempty"`
DefaultRuntimeConfig NodeRuntimeConfig `json:"defaultRuntimeConfig,omitempty"`
PreFundedKeys []*secp256k1.PrivateKey `json:"preFundedKeys,omitempty"`
}

func (n *Network) writeNetworkConfig() error {
Expand Down
Loading