Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: flaky tests fixes #5487

Merged
merged 3 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ ALWAYS:
# build our fork of libsodium, placing artifacts into crypto/lib/ and crypto/include/
crypto/libs/$(OS_TYPE)/$(ARCH)/lib/libsodium.a:
mkdir -p crypto/copies/$(OS_TYPE)/$(ARCH)
cp -R crypto/libsodium-fork crypto/copies/$(OS_TYPE)/$(ARCH)/libsodium-fork
cp -R crypto/libsodium-fork/. crypto/copies/$(OS_TYPE)/$(ARCH)/libsodium-fork
algorandskiy marked this conversation as resolved.
Show resolved Hide resolved
cd crypto/copies/$(OS_TYPE)/$(ARCH)/libsodium-fork && \
./autogen.sh --prefix $(SRCPATH)/crypto/libs/$(OS_TYPE)/$(ARCH) && \
./configure --disable-shared --prefix="$(SRCPATH)/crypto/libs/$(OS_TYPE)/$(ARCH)" && \
Expand Down
45 changes: 45 additions & 0 deletions data/ledger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package data
import (
"context"
"fmt"
"regexp"
"strconv"
"strings"
"sync"
"testing"

Expand Down Expand Up @@ -625,10 +628,52 @@ func TestLedgerErrorValidate(t *testing.T) {
for more {
select {
case err := <-errChan:
if strings.Contains(err.Error(), "before dbRound") {
// handle race eval errors like "round 1933 before dbRound 1934"
// see explanation in unexpectedMessages
re := regexp.MustCompile(`round (\d+) before dbRound (\d+)`)
result := re.FindStringSubmatch(err.Error())
require.NotNil(t, result)
require.Len(t, result, 3)
evalRound, err1 := strconv.Atoi(result[1])
require.NoError(t, err1)
dbRound, err1 := strconv.Atoi(result[2])
require.NoError(t, err1)
require.GreaterOrEqual(t, int(l.Latest()), dbRound+int(cfg.MaxAcctLookback))
require.Less(t, evalRound, dbRound)
err = nil
}
require.NoError(t, err)
case <-expectedMessages:
// only debug messages should be reported
case um := <-unexpectedMessages:
if strings.Contains(um, "before dbRound") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    ledger_test.go:632:
        	Error Trace:	/Users/shantkarakashian/go/src/github.com/algorand/go-algorand/data/ledger_test.go:632
        	Error:      	Should be empty, but was data.EnsureBlock: could not write block 1926 to the ledger: round 1925 before dbRound 1933
        	Test:       	TestLedgerErrorValidate
        	Messages:   	data.EnsureBlock: {C} could not write block 1926 to the ledger: round 1925 before dbRound 1933
--- FAIL: TestLedgerErrorValidate (0.27s)
    ledger_test.go:628:
        	Error Trace:	/Users/shantkarakashian/go/src/github.com/algorand/go-algorand/data/ledger_test.go:628
        	Error:      	Received unexpected error:
        	            	round 0 before dbRound 2
        	Test:       	TestLedgerErrorValidate

First is coming from data/ledger.go:

363                case ledgercore.BlockInLedgerError:
364                        // The block is already in the ledger. Catchup and agreement could be competing
365                        // It is sufficient to report this as a Debug message
366                        l.log.Debugf("data.EnsureBlock: {B} could not write block %d to the ledger: %v", round, err)
367                        return
368                default:
369                        l.log.Errorf("data.EnsureBlock: {C} could not write block %d to the ledger: %v", round, err)
370                }
371
372                // If there was an error add a short delay before the next attempt.
373                time.Sleep(100 * time.Millisecond)

The second from acctupdates.go:

1403// lookupWithoutRewards returns the account data for a given address at a given round.
1404func (au *accountUpdates) lookupWithoutRewards(rnd basics.Round, addr basics.Address, synchronized bool) (data ledgercore.AccountData, validThrough basics.Round\
    , rewardsVersion protocol.ConsensusVersion, rewardsLevel uint64, err error) {
1405        needUnlock := false
1406        if synchronized {
1407                au.accountsMu.RLock()
1408                needUnlock = true
1409        }
1410        defer func() {
1411                if needUnlock {
1412                        au.accountsMu.RUnlock()
1413                }
1414        }()
1415        var offset uint64
1416        var persistedData trackerdb.PersistedAccountData
1417        for {
1418                currentDbRound := au.cachedDBRound
1419                currentDeltaLen := len(au.deltas)
1420                offset, err = au.roundOffset(rnd, "{F}")
1421                if err != nil {
1422                        return
1423                }

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two places I could reproduce that send messages which break the test.
Are both of these legitimate? If so, the filter below should take care of the second case as well.
Note: {B} {C} {F} annotations are not in the code, I added them to track down where these messages are originated from.

Copy link
Contributor Author

@algorandskiy algorandskiy Jun 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

round 0 before dbRound 2

Handled this as well. This is from Lookup path that is part of block evaluation. The first one handled came from EnsureBlock and the second from AddBlock but because of the same reason - while AddBlock goroutine was suspended, the EnsureValidatedBlock add 6+ blocks so that ledger committed account changed and trackers advanced.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we uncover another error...

    ledger_test.go:677:
        	Error Trace:	/Users/shantkarakashian/go/src/github.com/algorand/go-algorand/data/ledger_test.go:677
        	Error:      	Should be empty, but was failed to delete hash '000001830087367e5228afa552f1eaf821f979a8352e527bb1e8119b761ae4598d5a5bf8' from merkle trie for account 7777777777777777777777777777777777777777777777777774MSJUVU
        	Test:       	TestLedgerErrorValidate
        	Messages:   	failed to delete hash '000001830087367e5228afa552f1eaf821f979a8352e527bb1e8119b761ae4598d5a5bf8' from merkle trie for account 7777777777777777777777777777777777777777777777777774MSJUVU

coming from catchpointtracker.go:

 952// accountsUpdateBalances applies the given compactAccountDeltas to the merkle trie
 953func (ct *catchpointTracker) accountsUpdateBalances(accountsDeltas compactAccountDeltas, resourcesDeltas compactResourcesDeltas, kvDeltas map[string]modifiedKvV$
 954        if !ct.catchpointEnabled() {
 955                return nil
 956        }
 957        accumulatedChanges := 0
 958
 959        for i := 0; i < accountsDeltas.len(); i++ {
 960                delta := accountsDeltas.getByIdx(i)
 961                if !delta.oldAcct.AccountData.IsEmpty() {
 962                        deleteHash := trackerdb.AccountHashBuilderV6(delta.address, &delta.oldAcct.AccountData, protocol.Encode(&delta.oldAcct.AccountData))
 963                        deleted, err := ct.balancesTrie.Delete(deleteHash)
 964                        if err != nil {
 965                                return fmt.Errorf("failed to delete hash '%s' from merkle trie for account %v: %w", hex.EncodeToString(deleteHash), delta.addres$
 966                        }
 967                        if !deleted {
 968                                ct.log.Errorf("failed to delete hash '%s' from merkle trie for account %v", hex.EncodeToString(deleteHash), delta.address)
 969                        } else {

// EnsureBlock might log the following:
// data.EnsureBlock: could not write block 774 to the ledger: round 773 before dbRound 774
// it happens because of simultaneous EnsureValidatedBlock and EnsureBlock calls
// that pass round check and then EnsureBlock yields after StartEvaluator.
// Meanwhile EnsureValidatedBlock finishes and adds the block to the ledger.
// After that trackersDB commit happen and account data get flushed.
// The EnsureBlock goroutine then tries to evaluate a first transaction and fails because
// the trackerDB advanced further.
// This is okay to ignore if
// - attempted round is less or equal than dbRound
// - ledger latest round is greater than dbRound + cfg.MaxAcctLookback
re := regexp.MustCompile(`could not write block (\d+) to the ledger: round (\d+) before dbRound (\d+)`)
result := re.FindStringSubmatch(um)
require.NotNil(t, result)
require.Len(t, result, 4)
attemptedRound, err := strconv.Atoi(result[1])
require.NoError(t, err)
evalRound, err := strconv.Atoi(result[2])
require.NoError(t, err)
dbRound, err := strconv.Atoi(result[3])
require.NoError(t, err)
require.Equal(t, attemptedRound, evalRound+1)
require.LessOrEqual(t, attemptedRound, dbRound)
require.GreaterOrEqual(t, int(l.Latest()), dbRound+int(cfg.MaxAcctLookback))
um = ""
}
require.Empty(t, um, um)
default:
more = false
Expand Down
16 changes: 14 additions & 2 deletions tools/network/dnssec/sort_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,26 @@ func TestSrvSort(t *testing.T) {
arr := make([]*net.SRV, 0, 7)
arr = append(arr, &net.SRV{Priority: 4, Weight: 1})
arr = append(arr, &net.SRV{Priority: 3, Weight: 1})
arr = append(arr, &net.SRV{Priority: 1, Weight: 200})
arr = append(arr, &net.SRV{Priority: 1, Weight: 0xFFFF}) // max possible value to increase the ordering probability
arr = append(arr, &net.SRV{Priority: 1, Weight: 1})
arr = append(arr, &net.SRV{Priority: 1, Weight: 1})
arr = append(arr, &net.SRV{Priority: 1, Weight: 1})
arr = append(arr, &net.SRV{Priority: 1, Weight: 1})

retryCounter := 0
retry:
srvRecArray(arr).sortAndRand()
a.Equal(net.SRV{Priority: 1, Weight: 200}, *arr[0])
if (*arr[0] != net.SRV{Priority: 1, Weight: 0xFFFF}) {
// there is a small chance that a random number from 0 to max uint15 would be 0 or 1
// in this case the first element of the resulting sequence would be with weight of 1 and not the highest possible.
// if this happens, we will try again since it is expected time to time.
if retryCounter > 1 {
a.Fail("The first element of the resulting sequence should be with the highest possible weight at least in one of 3 attempts")
}
retryCounter++
goto retry
}
a.Equal(net.SRV{Priority: 1, Weight: 0xFFFF}, *arr[0])
a.Equal(net.SRV{Priority: 1, Weight: 1}, *arr[1])
a.Equal(net.SRV{Priority: 1, Weight: 1}, *arr[2])
a.Equal(net.SRV{Priority: 1, Weight: 1}, *arr[3])
Expand Down
Loading