Skip to content

Commit

Permalink
Improve Trantor tests (#538)
Browse files Browse the repository at this point in the history
* Fix libp2p transport parameters

For some reason, if a node keeps retrying to connect to another
node too eagerly, the connection never succeeds.
Increasing the retry period from 1 to 3 seconds solves the issue.

* Prolong tests for Trantor with libp2p transport

* Fix cleaning up after Trantor tests, add skipping

* Use constant for sim transport (for linting)

Signed-off-by: Matej Pavlovic <[email protected]>
  • Loading branch information
matejpavlovic authored Jan 21, 2024
1 parent 6a2a42c commit d45c300
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 39 deletions.
4 changes: 2 additions & 2 deletions pkg/net/libp2p/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ func DefaultParams() Params {
return Params{
ProtocolID: "/mir/0.0.1",
ConnectionTTL: DefaultConnectionTTL,
ConnectionBufferSize: 128,
ConnectionBufferSize: 512,
StreamWriteTimeout: 100 * time.Millisecond,
ReconnectionPeriod: time.Second,
ReconnectionPeriod: 3 * time.Second,
MaxDataPerWrite: 100 * 1024, // 100 kiB
MinComplainPeriod: time.Second,
}
Expand Down
13 changes: 9 additions & 4 deletions pkg/net/libp2p/transport_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,12 @@ func (m *mockLibp2pCommunication) testEventuallyConnected(nodeID1, nodeID2 stdty
return network.Connected == n1.host.Network().Connectedness(n2.host.ID()) &&
m.streamExist(n1, n2) && m.streamExist(n2, n1)
},
15*time.Second, 100*time.Millisecond)
60*time.Second, 200*time.Millisecond)
// One minute seems like way too much, but we observed cases where connection attempts kept failing
// for over 30 seconds.
// TODO: there is something fishy about this.
// It only started occurring after updating quic-go from 0.33.0 to 0.39.4,
// as part of a libp2p update from 0.27.8 to 0.32.2.
}

func (m *mockLibp2pCommunication) testConnectionsEmpty() {
Expand Down Expand Up @@ -654,9 +659,9 @@ func TestMessaging(t *testing.T) {
received := 0
disconnect := make(chan struct{})

testTimeDuration := time.Duration(15)
testTimer := time.NewTimer(testTimeDuration * time.Second)
disconnectTimer := time.NewTimer(testTimeDuration / 3 * time.Second)
testTimeDuration := 15 * time.Second
testTimer := time.NewTimer(testTimeDuration)
disconnectTimer := time.NewTimer(testTimeDuration / 3)

wg.Add(1)
go func() {
Expand Down
89 changes: 56 additions & 33 deletions pkg/trantor/testing/smr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"os"
"path/filepath"
"strconv"
"sync"
"testing"
"time"

Expand Down Expand Up @@ -37,7 +38,8 @@ import (
)

const (
failedTestDir = "failed-test-data"
failedTestDir = "failed-test-data"
simTransportName = "sim"
)

func TestIntegration(t *testing.T) {
Expand Down Expand Up @@ -74,6 +76,7 @@ type TestConfig struct {
CrashedReplicas map[int]bool
CheckFunc func(tb testing.TB, deployment *deploytest.Deployment, conf *TestConfig)
Logger logging.Logger
Skip bool // If set to true, test is skipped.
}

func testIntegrationWithISS(tt *testing.T) {
Expand Down Expand Up @@ -117,15 +120,15 @@ func testIntegrationWithISS(tt *testing.T) {
NumClients: 1,
Transport: "libp2p",
NumFakeTXs: 10,
Duration: 10 * time.Second,
Duration: 60 * time.Second,
}},
5: {"Submit 10 transactions with 1 node and libp2p networking",
&TestConfig{
NodeIDsWeight: deploytest.NewNodeIDsDefaultWeights(1),
NumClients: 1,
Transport: "libp2p",
NumNetTXs: 10,
Duration: 10 * time.Second,
Duration: 20 * time.Second,
}},
6: {"Submit 10 transactions with 4 nodes and libp2p networking",
&TestConfig{
Expand All @@ -134,25 +137,25 @@ func testIntegrationWithISS(tt *testing.T) {
NumClients: 1,
Transport: "libp2p",
NumNetTXs: 10,
Duration: 15 * time.Second,
Duration: 60 * time.Second,
}},
7: {"Do nothing with 1 node in simulation",
&TestConfig{
NodeIDsWeight: deploytest.NewNodeIDsDefaultWeights(1),
Transport: "sim",
Transport: simTransportName,
Duration: 4 * time.Second,
}},
8: {"Do nothing with 4 nodes in simulation, one of them slow",
&TestConfig{
NodeIDsWeight: deploytest.NewNodeIDsDefaultWeights(4),
Transport: "sim",
Transport: simTransportName,
Duration: 20 * time.Second,
SlowProposeReplicas: map[int]bool{0: true},
}},
9: {"Submit 10 fake transactions with 1 node in simulation",
&TestConfig{
NodeIDsWeight: deploytest.NewNodeIDsDefaultWeights(1),
Transport: "sim",
Transport: simTransportName,
NumFakeTXs: 10,
Directory: "mirbft-deployment-test",
Duration: 4 * time.Second,
Expand All @@ -161,7 +164,7 @@ func testIntegrationWithISS(tt *testing.T) {
&TestConfig{
NodeIDsWeight: deploytest.NewNodeIDsDefaultWeights(1),
NumClients: 1,
Transport: "sim",
Transport: simTransportName,
NumFakeTXs: 10,
Directory: "mirbft-deployment-test",
Duration: 4 * time.Second,
Expand All @@ -170,15 +173,15 @@ func testIntegrationWithISS(tt *testing.T) {
&TestConfig{
NodeIDsWeight: deploytest.NewNodeIDsDefaultWeights(1),
NumClients: 0,
Transport: "sim",
Transport: simTransportName,
NumFakeTXs: 100,
Duration: 20 * time.Second,
}},
12: {"Submit 100 fake transactions with 4 nodes in simulation, one of them slow",
&TestConfig{
NodeIDsWeight: deploytest.NewNodeIDsDefaultWeights(4),
NumClients: 0,
Transport: "sim",
Transport: simTransportName,
NumFakeTXs: 100,
Duration: 20 * time.Second,
SlowProposeReplicas: map[int]bool{0: true},
Expand All @@ -190,9 +193,9 @@ func testIntegrationWithISS(tt *testing.T) {
return types.VoteWeight(fmt.Sprintf("%d0000000000000000000", pow2(int(numericID)))) // ensures last 2 nodes weight is greater than twice the sum of the others'
}),
NumClients: 0,
Transport: "libp2p",
Transport: simTransportName,
NumFakeTXs: 100,
Duration: 20 * time.Second,
Duration: 30 * time.Second,
ErrorExpected: es.Errorf("no transactions were delivered"),
CrashedReplicas: map[int]bool{2: true, 3: true},
CheckFunc: func(tb testing.TB, deployment *deploytest.Deployment, conf *TestConfig) {
Expand All @@ -210,9 +213,9 @@ func testIntegrationWithISS(tt *testing.T) {
return types.VoteWeight(fmt.Sprintf("%d0000000000000000000", pow2(int(4-numericID)))) // ensures first 2 nodes weight is greater than twice the sum of the others'
}),
NumClients: 0,
Transport: "libp2p",
Transport: simTransportName,
NumFakeTXs: 100,
Duration: 40 * time.Second,
Duration: 60 * time.Second,
ErrorExpected: es.Errorf("no transactions were delivered"),
CrashedReplicas: map[int]bool{2: true, 3: true},
CheckFunc: func(tb testing.TB, deployment *deploytest.Deployment, conf *TestConfig) {
Expand All @@ -228,12 +231,39 @@ func testIntegrationWithISS(tt *testing.T) {
for i, test := range tests {
i, test := i, test

// Create a directory for the deployment-generated files and set the test directory name.
// The directory will be automatically removed when the outer test function exits.
createDeploymentDir(tt, test.Config)

var lock sync.Mutex
tt.Run(fmt.Sprintf("%03d", i), func(t *testing.T) {
simMode := (test.Config.Transport == "sim")

lock.Lock()
defer lock.Unlock()

if test.Config.Skip {
t.Logf("Skipping test: %s", t.Name())
return
}

defer func() {
if err := recover(); err != nil || t.Failed() {
t.Logf("Test #%03d (%s) failed", i, test.Desc)
if test.Config.Transport == simTransportName {
t.Logf("Reproduce with RANDOM_SEED=%d", test.Config.RandomSeed)
}
// Save the test data.
testRelDir, err := filepath.Rel(os.TempDir(), test.Config.Directory)
require.NoError(t, err)
retainedDir := filepath.Join(failedTestDir, testRelDir)

t.Logf("Saving deployment data to: %s\n", retainedDir)
err = copy.Copy(test.Config.Directory, retainedDir)
require.NoError(t, err)
}
}()

// Create a directory for the deployment-generated files and set the test directory name.
// The directory will be automatically removed when the outer test function exits.
createDeploymentDir(t, test.Config)

simMode := test.Config.Transport == simTransportName
if testing.Short() && !simMode {
t.SkipNow()
}
Expand All @@ -250,13 +280,6 @@ func testIntegrationWithISS(tt *testing.T) {
}

runIntegrationWithISSConfig(t, test.Config)

if t.Failed() {
t.Logf("Test #%03d (%s) failed", i, test.Desc)
if simMode {
t.Logf("Reproduce with RANDOM_SEED=%d", test.Config.RandomSeed)
}
}
})
}
}
Expand Down Expand Up @@ -390,23 +413,23 @@ func createDeploymentDir(tb testing.TB, conf *TestConfig) {

if conf.Directory != "" {
conf.Directory = filepath.Join(os.TempDir(), conf.Directory)
tb.Logf("Using deployment dir: %s\n", conf.Directory)
err := os.MkdirAll(conf.Directory, 0777)
require.NoError(tb, err)
tb.Cleanup(func() { os.RemoveAll(conf.Directory) })
} else {
// If no directory is configured, create a temporary directory in the OS-default location.
conf.Directory = tb.TempDir()
tb.Logf("Created temp dir: %s\n", conf.Directory)
conf.Directory = filepath.Join(tb.TempDir(), tb.Name())
}

tb.Logf("Using deployment dir: %s\n", conf.Directory)
err := os.MkdirAll(conf.Directory, 0777)
require.NoError(tb, err)
tb.Cleanup(func() { os.RemoveAll(conf.Directory) })
}

func newDeployment(conf *TestConfig) (*deploytest.Deployment, error) {
nodeIDs := maputil.GetSortedKeys(conf.NodeIDsWeight)
logger := deploytest.NewLogger(conf.Logger)

var simulation *deploytest.Simulation
if conf.Transport == "sim" {
if conf.Transport == simTransportName {
r := rand.New(rand.NewSource(conf.RandomSeed)) // nolint: gosec
eventDelayFn := func(e stdtypes.Event) time.Duration {
// TODO: Make min and max event processing delay configurable
Expand Down

0 comments on commit d45c300

Please sign in to comment.