From 9ad5aed42721b7c337fa09f84c5e1263df0d08dd Mon Sep 17 00:00:00 2001 From: ramin Date: Thu, 4 Jul 2024 15:57:42 +0100 Subject: [PATCH 1/8] modify values --- share/p2p/discovery/discovery.go | 16 +++++++------- share/p2p/discovery/discovery_test.go | 2 +- share/p2p/peers/manager_test.go | 30 +++++++++++++++++---------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/share/p2p/discovery/discovery.go b/share/p2p/discovery/discovery.go index fe99815d94..0849985368 100644 --- a/share/p2p/discovery/discovery.go +++ b/share/p2p/discovery/discovery.go @@ -28,16 +28,17 @@ const ( // findPeersTimeout limits the FindPeers operation in time findPeersTimeout = time.Minute - // retryTimeout defines time interval between discovery and advertise attempts. - retryTimeout = time.Second + // advertiseRetryTimeout defines time interval between advertise attempts. + advertiseRetryTimeout = time.Second * 1 // logInterval defines the time interval at which a warning message will be logged // if the desired number of nodes is not detected. logInterval = 5 * time.Minute ) -// discoveryRetryTimeout defines time interval between discovery attempts, needed for tests -var discoveryRetryTimeout = retryTimeout +// discoveryRetryTimeout defines time interval between discovery attempts +// this is set independently for tests in discover_test.go +var DiscoveryRetryTimeout = advertiseRetryTimeout * 60 // Discovery combines advertise and discover services and allows to store discovered nodes. // TODO: The code here gets horribly hairy, so we should refactor this at some point @@ -181,16 +182,13 @@ func (d *Discovery) Advertise(ctx context.Context) { // we don't want retry indefinitely in busy loop // internal discovery mechanism may need some time before attempts - errTimer := time.NewTimer(retryTimeout) select { - case <-errTimer.C: - errTimer.Stop() + case <-time.After(advertiseRetryTimeout): if !timer.Stop() { <-timer.C } continue case <-ctx.Done(): - errTimer.Stop() return } } @@ -212,7 +210,7 @@ func (d *Discovery) Advertise(ctx context.Context) { // It initiates peer discovery upon request and restarts the process until the soft limit is // reached. func (d *Discovery) discoveryLoop(ctx context.Context) { - t := time.NewTicker(discoveryRetryTimeout) + t := time.NewTicker(DiscoveryRetryTimeout) defer t.Stop() warnTicker := time.NewTicker(logInterval) diff --git a/share/p2p/discovery/discovery_test.go b/share/p2p/discovery/discovery_test.go index 8214a2bbe0..e4422b1c14 100644 --- a/share/p2p/discovery/discovery_test.go +++ b/share/p2p/discovery/discovery_test.go @@ -26,7 +26,7 @@ const ( func TestDiscovery(t *testing.T) { const nodes = 10 // higher number brings higher coverage - discoveryRetryTimeout = time.Millisecond * 100 // defined in discovery.go + DiscoveryRetryTimeout = time.Millisecond * 100 // defined in discovery.go ctx, cancel := context.WithTimeout(context.Background(), time.Second*20) t.Cleanup(cancel) diff --git a/share/p2p/peers/manager_test.go b/share/p2p/peers/manager_test.go index 2a465dc59a..524acf33b6 100644 --- a/share/p2p/peers/manager_test.go +++ b/share/p2p/peers/manager_test.go @@ -26,9 +26,13 @@ import ( "github.com/celestiaorg/celestia-node/share/p2p/shrexsub" ) +const ( + defaultTimeout = time.Second * 5 +) + func TestManager(t *testing.T) { t.Run("Validate pool by headerSub", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) // create headerSub mock @@ -49,7 +53,7 @@ func TestManager(t *testing.T) { }) t.Run("Validate pool by shrex.Getter", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) h := testHeader() @@ -72,7 +76,7 @@ func TestManager(t *testing.T) { }) t.Run("validator", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) // create headerSub mock @@ -108,7 +112,7 @@ func TestManager(t *testing.T) { }) t.Run("cleanup", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) // create headerSub mock @@ -153,7 +157,7 @@ func TestManager(t *testing.T) { }) t.Run("no peers from shrex.Sub, get from discovery", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) // create headerSub mock @@ -176,7 +180,7 @@ func TestManager(t *testing.T) { }) t.Run("no peers from shrex.Sub and from discovery. Wait", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) // create headerSub mock @@ -218,7 +222,7 @@ func TestManager(t *testing.T) { }) t.Run("shrexSub sends a message lower than first headerSub header height, headerSub first", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) h := testHeader() @@ -251,7 +255,7 @@ func TestManager(t *testing.T) { }) t.Run("shrexSub sends a message lower than first headerSub header height, shrexSub first", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) h := testHeader() @@ -289,7 +293,7 @@ func TestManager(t *testing.T) { }) t.Run("pools store window", func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) h := testHeader() @@ -322,7 +326,7 @@ func TestIntegration(t *testing.T) { t.Run("get peer from shrexsub", func(t *testing.T) { nw, err := mocknet.FullMeshLinked(2) require.NoError(t, err) - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) bnPubSub, err := shrexsub.NewPubSub(ctx, nw.Hosts()[0], "test") @@ -365,7 +369,7 @@ func TestIntegration(t *testing.T) { fullNodesTag := "fullNodes" nw, err := mocknet.FullMeshConnected(3) require.NoError(t, err) - ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) t.Cleanup(cancel) // set up bootstrapper @@ -391,6 +395,9 @@ func TestIntegration(t *testing.T) { bnRouter, err := dht.New(ctx, bnHost, opts...) require.NoError(t, err) + // hack + discovery.DiscoveryRetryTimeout = time.Millisecond * 100 + params := discovery.DefaultParameters() params.AdvertiseInterval = time.Second @@ -450,6 +457,7 @@ func TestIntegration(t *testing.T) { go bnDisc.Advertise(ctx) select { + case <-waitCh: require.Contains(t, fnPeerManager.nodes.peersList, bnHost.ID()) case <-ctx.Done(): From 5f617f7304b3c3aa74f906069bb316674d83e9e5 Mon Sep 17 00:00:00 2001 From: ramin Date: Wed, 10 Jul 2024 14:56:17 +0100 Subject: [PATCH 2/8] move some timeout options to Params so we can change params, but also modify in test environments in a sane way --- share/p2p/discovery/discovery.go | 11 +--- share/p2p/discovery/discovery_test.go | 6 +-- share/p2p/discovery/options.go | 36 ++++++++++++- share/p2p/discovery/options_test.go | 78 +++++++++++++++++++++++++++ share/p2p/peers/manager_test.go | 6 +-- 5 files changed, 118 insertions(+), 19 deletions(-) create mode 100644 share/p2p/discovery/options_test.go diff --git a/share/p2p/discovery/discovery.go b/share/p2p/discovery/discovery.go index 0849985368..592ea4f8dd 100644 --- a/share/p2p/discovery/discovery.go +++ b/share/p2p/discovery/discovery.go @@ -28,18 +28,11 @@ const ( // findPeersTimeout limits the FindPeers operation in time findPeersTimeout = time.Minute - // advertiseRetryTimeout defines time interval between advertise attempts. - advertiseRetryTimeout = time.Second * 1 - // logInterval defines the time interval at which a warning message will be logged // if the desired number of nodes is not detected. logInterval = 5 * time.Minute ) -// discoveryRetryTimeout defines time interval between discovery attempts -// this is set independently for tests in discover_test.go -var DiscoveryRetryTimeout = advertiseRetryTimeout * 60 - // Discovery combines advertise and discover services and allows to store discovered nodes. // TODO: The code here gets horribly hairy, so we should refactor this at some point type Discovery struct { @@ -183,7 +176,7 @@ func (d *Discovery) Advertise(ctx context.Context) { // we don't want retry indefinitely in busy loop // internal discovery mechanism may need some time before attempts select { - case <-time.After(advertiseRetryTimeout): + case <-time.After(d.params.AdvertiseRetryTimeout): if !timer.Stop() { <-timer.C } @@ -210,7 +203,7 @@ func (d *Discovery) Advertise(ctx context.Context) { // It initiates peer discovery upon request and restarts the process until the soft limit is // reached. func (d *Discovery) discoveryLoop(ctx context.Context) { - t := time.NewTicker(DiscoveryRetryTimeout) + t := time.NewTicker(d.params.DiscoveryRetryTimeout) defer t.Stop() warnTicker := time.NewTicker(logInterval) diff --git a/share/p2p/discovery/discovery_test.go b/share/p2p/discovery/discovery_test.go index e4422b1c14..88cf0ec08e 100644 --- a/share/p2p/discovery/discovery_test.go +++ b/share/p2p/discovery/discovery_test.go @@ -26,8 +26,6 @@ const ( func TestDiscovery(t *testing.T) { const nodes = 10 // higher number brings higher coverage - DiscoveryRetryTimeout = time.Millisecond * 100 // defined in discovery.go - ctx, cancel := context.WithTimeout(context.Background(), time.Second*20) t.Cleanup(cancel) @@ -43,7 +41,7 @@ func TestDiscovery(t *testing.T) { } host, routingDisc := tn.peer() - params := DefaultParameters() + params := TestParameters() params.PeersLimit = nodes // start discovery listener service for peerA @@ -103,7 +101,7 @@ func TestDiscoveryTagged(t *testing.T) { // sub will discover both peers, but on different tags sub, routingDisc := tn.peer() - params := DefaultParameters() + params := TestParameters() // create 2 discovery services for sub, each with a different tag done1 := make(chan struct{}) diff --git a/share/p2p/discovery/options.go b/share/p2p/discovery/options.go index 8515bcbe00..b9dd9bbdcd 100644 --- a/share/p2p/discovery/options.go +++ b/share/p2p/discovery/options.go @@ -7,6 +7,11 @@ import ( "github.com/libp2p/go-libp2p/core/peer" ) +const ( + defaultAdvertiseRetryTimeout = time.Second + defaultDiscoveryRetryTimeout = defaultAdvertiseRetryTimeout * 60 +) + // Parameters is the set of Parameters that must be configured for the Discovery module type Parameters struct { // PeersLimit defines the soft limit of FNs to connect to via discovery. @@ -15,6 +20,13 @@ type Parameters struct { // AdvertiseInterval is a interval between advertising sessions. // NOTE: only full and bridge can advertise themselves. AdvertiseInterval time.Duration + + // advertiseRetryTimeout defines time interval between advertise attempts. + AdvertiseRetryTimeout time.Duration + + // DiscoveryRetryTimeout defines time interval between discovery attempts + // this is set independently for tests in discover_test.go + DiscoveryRetryTimeout time.Duration } // options is the set of options that can be configured for the Discovery module @@ -33,13 +45,33 @@ type Option func(*options) // for the Discovery module func DefaultParameters() *Parameters { return &Parameters{ - PeersLimit: 5, - AdvertiseInterval: time.Hour, + PeersLimit: 5, + AdvertiseInterval: time.Hour, + AdvertiseRetryTimeout: defaultAdvertiseRetryTimeout, + DiscoveryRetryTimeout: defaultDiscoveryRetryTimeout, } } +// TestParameters returns the default Parameters' configuration values +// for the Discovery module, with some changes for configuration +// during tests +func TestParameters() *Parameters { + p := DefaultParameters() + p.AdvertiseInterval = time.Second * 1 + p.DiscoveryRetryTimeout = time.Millisecond * 50 + return p +} + // Validate validates the values in Parameters func (p *Parameters) Validate() error { + if p.AdvertiseRetryTimeout <= 0 { + return fmt.Errorf("discovery: advertise retry timeout cannot be zero or negative") + } + + if p.DiscoveryRetryTimeout <= 0 { + return fmt.Errorf("discovery: discovery retry timeout cannot be zero or negative") + } + if p.PeersLimit <= 0 { return fmt.Errorf("discovery: peers limit cannot be zero or negative") } diff --git a/share/p2p/discovery/options_test.go b/share/p2p/discovery/options_test.go new file mode 100644 index 0000000000..b4aa40a55d --- /dev/null +++ b/share/p2p/discovery/options_test.go @@ -0,0 +1,78 @@ +package discovery + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestValidate(t *testing.T) { + tests := []struct { + name string + params Parameters + wantErr bool + }{ + { + name: "valid parameters", + params: Parameters{ + PeersLimit: 5, + AdvertiseInterval: time.Hour, + AdvertiseRetryTimeout: time.Second, + DiscoveryRetryTimeout: time.Minute, + }, + wantErr: false, + }, + { + name: "negative PeersLimit", + params: Parameters{ + PeersLimit: 0, + AdvertiseInterval: time.Hour, + AdvertiseRetryTimeout: time.Second, + DiscoveryRetryTimeout: time.Minute, + }, + wantErr: true, + }, + { + name: "negative AdvertiseInterval", + params: Parameters{ + PeersLimit: 5, + AdvertiseInterval: -time.Hour, + AdvertiseRetryTimeout: time.Second, + DiscoveryRetryTimeout: time.Minute, + }, + wantErr: true, + }, + { + name: "negative AdvertiseRetryTimeout", + params: Parameters{ + PeersLimit: 5, + AdvertiseInterval: time.Hour, + AdvertiseRetryTimeout: -time.Second, + DiscoveryRetryTimeout: time.Minute, + }, + wantErr: true, + }, + { + name: "negative DiscoveryRetryTimeout", + params: Parameters{ + PeersLimit: 5, + AdvertiseInterval: time.Hour, + AdvertiseRetryTimeout: time.Second, + DiscoveryRetryTimeout: -time.Minute, + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.params.Validate() + if tt.wantErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} diff --git a/share/p2p/peers/manager_test.go b/share/p2p/peers/manager_test.go index 524acf33b6..ece4e70cab 100644 --- a/share/p2p/peers/manager_test.go +++ b/share/p2p/peers/manager_test.go @@ -395,11 +395,9 @@ func TestIntegration(t *testing.T) { bnRouter, err := dht.New(ctx, bnHost, opts...) require.NoError(t, err) - // hack - discovery.DiscoveryRetryTimeout = time.Millisecond * 100 - params := discovery.DefaultParameters() params.AdvertiseInterval = time.Second + params.DiscoveryRetryTimeout = time.Millisecond * 100 bnDisc, err := discovery.NewDiscovery( params, @@ -433,7 +431,7 @@ func TestIntegration(t *testing.T) { } // set up discovery for full node with hook to peer manager and check discovered peer - params = discovery.DefaultParameters() + params = discovery.TestParameters() params.AdvertiseInterval = time.Second params.PeersLimit = 10 From 8b2121282e23b6d7bb8c4cdfd197a9a5b7a66810 Mon Sep 17 00:00:00 2001 From: ramin Date: Wed, 10 Jul 2024 21:03:43 +0100 Subject: [PATCH 3/8] trigger ci on branch push --- .github/workflows/ci_release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci_release.yml b/.github/workflows/ci_release.yml index 8efcf094b9..a3d9f3489e 100644 --- a/.github/workflows/ci_release.yml +++ b/.github/workflows/ci_release.yml @@ -4,6 +4,7 @@ on: push: branches: - main + - fix/ramin/reduce-discovery-retries release: types: [published] pull_request: From 837305b6497832bb72ba4cc7f363f8840b52aaf4 Mon Sep 17 00:00:00 2001 From: ramin Date: Wed, 10 Jul 2024 21:29:09 +0100 Subject: [PATCH 4/8] fix lint --- share/p2p/peers/manager_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/share/p2p/peers/manager_test.go b/share/p2p/peers/manager_test.go index ece4e70cab..873c3cd7e3 100644 --- a/share/p2p/peers/manager_test.go +++ b/share/p2p/peers/manager_test.go @@ -455,7 +455,6 @@ func TestIntegration(t *testing.T) { go bnDisc.Advertise(ctx) select { - case <-waitCh: require.Contains(t, fnPeerManager.nodes.peersList, bnHost.ID()) case <-ctx.Done(): From 128d99b9bd07a5f1824c2f159dea50c0e6c7fa22 Mon Sep 17 00:00:00 2001 From: ramin Date: Thu, 11 Jul 2024 10:14:32 +0100 Subject: [PATCH 5/8] remove test branch trigger condition --- .github/workflows/ci_release.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci_release.yml b/.github/workflows/ci_release.yml index a3d9f3489e..8efcf094b9 100644 --- a/.github/workflows/ci_release.yml +++ b/.github/workflows/ci_release.yml @@ -4,7 +4,6 @@ on: push: branches: - main - - fix/ramin/reduce-discovery-retries release: types: [published] pull_request: From 55775a3b982c70565b436b29526eb2f261415a3a Mon Sep 17 00:00:00 2001 From: ramin Date: Thu, 11 Jul 2024 10:57:07 +0100 Subject: [PATCH 6/8] update name in comment --- share/p2p/discovery/options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/p2p/discovery/options.go b/share/p2p/discovery/options.go index b9dd9bbdcd..0a8fd1f214 100644 --- a/share/p2p/discovery/options.go +++ b/share/p2p/discovery/options.go @@ -21,7 +21,7 @@ type Parameters struct { // NOTE: only full and bridge can advertise themselves. AdvertiseInterval time.Duration - // advertiseRetryTimeout defines time interval between advertise attempts. + // AdvertiseRetryTimeout defines time interval between advertise attempts. AdvertiseRetryTimeout time.Duration // DiscoveryRetryTimeout defines time interval between discovery attempts From d21b4d828baec90cbcecc4d9cfb07acc04e11528 Mon Sep 17 00:00:00 2001 From: ramin Date: Thu, 18 Jul 2024 09:51:55 +0100 Subject: [PATCH 7/8] move defaults to struct --- share/p2p/discovery/options.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/share/p2p/discovery/options.go b/share/p2p/discovery/options.go index 0a8fd1f214..c65342a0d4 100644 --- a/share/p2p/discovery/options.go +++ b/share/p2p/discovery/options.go @@ -7,11 +7,6 @@ import ( "github.com/libp2p/go-libp2p/core/peer" ) -const ( - defaultAdvertiseRetryTimeout = time.Second - defaultDiscoveryRetryTimeout = defaultAdvertiseRetryTimeout * 60 -) - // Parameters is the set of Parameters that must be configured for the Discovery module type Parameters struct { // PeersLimit defines the soft limit of FNs to connect to via discovery. @@ -47,8 +42,8 @@ func DefaultParameters() *Parameters { return &Parameters{ PeersLimit: 5, AdvertiseInterval: time.Hour, - AdvertiseRetryTimeout: defaultAdvertiseRetryTimeout, - DiscoveryRetryTimeout: defaultDiscoveryRetryTimeout, + AdvertiseRetryTimeout: time.Second, + DiscoveryRetryTimeout: time.Second * 60, } } From ca67fef50f89af5cbbc1b4ebfebc40f733ddae92 Mon Sep 17 00:00:00 2001 From: ramin Date: Thu, 18 Jul 2024 23:24:49 +0100 Subject: [PATCH 8/8] when searching for honey, one must recognize a hornets' nest and back away if accidentally mistaken for a bees nest --- share/p2p/discovery/discovery.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/share/p2p/discovery/discovery.go b/share/p2p/discovery/discovery.go index 592ea4f8dd..af4905f3e5 100644 --- a/share/p2p/discovery/discovery.go +++ b/share/p2p/discovery/discovery.go @@ -175,13 +175,16 @@ func (d *Discovery) Advertise(ctx context.Context) { // we don't want retry indefinitely in busy loop // internal discovery mechanism may need some time before attempts + errTimer := time.NewTimer(d.params.AdvertiseRetryTimeout) select { - case <-time.After(d.params.AdvertiseRetryTimeout): + case <-errTimer.C: + errTimer.Stop() if !timer.Stop() { <-timer.C } continue case <-ctx.Done(): + errTimer.Stop() return } }