Skip to content

Commit

Permalink
Merge pull request #4757 from IntersectMBO/bolt12/p2p-bug-test
Browse files Browse the repository at this point in the history
Adds unit test to check for phantom connections
  • Loading branch information
bolt12 authored Dec 22, 2023
2 parents c3d5669 + dcaf2eb commit 8b7326c
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ouroboros-network-testing/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### Breaking changes

* Adds `eventually` and `eventsToListWithId` functions to Signal API

### Non-breaking changes

## 0.4.1.0 -- 2023-12-14
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module Ouroboros.Network.Testing.Data.Signal
, eventsFromList
, eventsFromListUpToTime
, eventsToList
, eventsToListWithId
, selectEvents
-- * Low level access
, primitiveTransformEvents
Expand All @@ -33,6 +34,7 @@ module Ouroboros.Network.Testing.Data.Signal
, until
, difference
, scanl
, eventually
-- * Set-based temporal operations
, keyedTimeout
, keyedTimeoutTruncated
Expand Down Expand Up @@ -121,6 +123,9 @@ eventsFromListUpToTime horizon txs =
eventsToList :: Events a -> [(Time, a)]
eventsToList (Events txs) = [ (t, x) | E (TS t _i) x <- txs ]

eventsToListWithId :: Events a -> [E a]
eventsToListWithId (Events txs) = txs

selectEvents :: (a -> Maybe b) -> Events a -> Events b
selectEvents select (Events txs) =
Events [ E t y | E t x <- txs, y <- maybeToList (select x) ]
Expand Down Expand Up @@ -487,6 +492,16 @@ scanl f z (Signal x0 txs0) =
where
a' = f a x

-- | Starting on a given event does the predicate eventually holds.
--
-- If there's no events after the given time, return True
eventually :: TS -> (b -> Bool) -> Signal b -> Bool
eventually (TS time i) p (Signal x0 txs0)
| time <= Time 0 = p x0 || any (\(E _ b) -> p b) txs0
| otherwise = case dropWhile (\(E (TS time' i') _) -> time' <= time && i' < i) txs0 of
[] -> True
r -> any (\(E _ b) -> p b) r

--
-- QuickCheck
--
Expand Down
2 changes: 2 additions & 0 deletions ouroboros-network/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

### Non-breaking changes

* Adds 'unit_reconnect' testnet test

## 0.10.2.2 -- 2023-12-15

### Non-breaking changes
Expand Down
148 changes: 148 additions & 0 deletions ouroboros-network/sim-tests-lib/Test/Ouroboros/Network/Testnet.hs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ tests =
prop_diffusion_target_active_public
, testProperty "target established local"
prop_diffusion_target_established_local
, testProperty "unit reconnect"
prop_unit_reconnect
, testProperty "target active local"
prop_diffusion_target_active_local
, testProperty "target active root"
Expand Down Expand Up @@ -2614,6 +2616,140 @@ prop_unit_4258 =
)]
in prop_diffusion_cm_valid_transition_order bearerInfo diffScript

-- | This unit tests checks that for every * -> TerminatedSt Connection
-- Manager transition, there's a corresponding peer selection state update
-- where the peer gets removed from the established set.
--
-- Due to how IOSim currently works, the outbound governor thread is always
-- going to be scheduled first since it is always the first to block (on STM).
-- However this bug is triggered by a race condition between the
-- 'peerMonitoringLoop' and the outbound governor, where the
-- 'peerMonitoringLoop' will update the peer status way too fast and the
-- out-governor won't be able to notice the intermediate state (STM doesn't
-- guarantee all intermediate states are seen). If this happens the
-- out-governor will fail to remove the peer from the established peers set
-- and will think it has a connection to it when it does not.
--
-- If one wishes to check if the bug is present one should (unless IOSim is
-- patched to explore more schedules or IOSimPOR is made more efficient) add a
-- 'threadDelay' to 'evalGuardedDecisions' in the outbound governor code to
-- force it to go to the back of the queue everytime.
--
prop_unit_reconnect :: Property
prop_unit_reconnect =
let diffScript =
DiffusionScript
(SimArgs 1 10)
(singletonTimedScript Map.empty)
[(NodeArgs
(-3)
InitiatorAndResponderDiffusionMode
(Just 224)
Map.empty
(TestAddress (IPAddr (read "0.0.0.0") 0))
PeerSharingDisabled
[ (2,2,Map.fromList [ (RelayAccessAddress "0.0.0.1" 0,DoNotAdvertisePeer)
, (RelayAccessAddress "0.0.0.2" 0,DoNotAdvertisePeer)
])
]
(Script (LedgerPools [] :| []))
PeerSelectionTargets {
targetNumberOfRootPeers = 1,
targetNumberOfKnownPeers = 1,
targetNumberOfEstablishedPeers = 1,
targetNumberOfActivePeers = 1,

targetNumberOfKnownBigLedgerPeers = 0,
targetNumberOfEstablishedBigLedgerPeers = 0,
targetNumberOfActiveBigLedgerPeers = 0
}
(Script (DNSTimeout {getDNSTimeout = 10} :| []))
(Script (DNSLookupDelay {getDNSLookupDelay = 0} :| []))
Nothing
False
, [ JoinNetwork 0
])
, (NodeArgs
(-1)
InitiatorAndResponderDiffusionMode
(Just 2)
Map.empty
(TestAddress (IPAddr (read "0.0.0.1") 0))
PeerSharingDisabled
[(1,1,Map.fromList [(RelayAccessAddress "0.0.0.0" 0,DoNotAdvertisePeer)])]
(Script (LedgerPools [] :| []))
PeerSelectionTargets {
targetNumberOfRootPeers = 1,
targetNumberOfKnownPeers = 1,
targetNumberOfEstablishedPeers = 1,
targetNumberOfActivePeers = 1,

targetNumberOfKnownBigLedgerPeers = 0,
targetNumberOfEstablishedBigLedgerPeers = 0,
targetNumberOfActiveBigLedgerPeers = 0
}
(Script (DNSTimeout {getDNSTimeout = 10} :| [ ]))
(Script (DNSLookupDelay {getDNSLookupDelay = 0} :| []))
Nothing
False
, [ JoinNetwork 10
])
]

sim :: forall s . IOSim s Void
sim = diffusionSimulation (toBearerInfo (absNoAttenuation { abiInboundAttenuation = SpeedAttenuation SlowSpeed (Time 20) 1000
} ))
diffScript
iosimTracer

events :: [Events DiffusionTestTrace]
events = fmap ( Signal.eventsFromList
. fmap (\(WithName _ (WithTime t b)) -> (t, b))
)
. Trace.toList
. splitWithNameTrace
. Trace.fromList ()
. fmap snd
. Trace.toList
. fmap (\(WithTime t (WithName name b)) -> (t, WithName name (WithTime t b)))
. withTimeNameTraceEvents
@DiffusionTestTrace
@NtNAddr
. traceFromList
. fmap (\(t, tid, tl, te) -> SimEvent t tid tl te)
. take 125000
. traceEvents
$ runSimTrace sim

in conjoin
$ verify_consistency
<$> events

where
verify_consistency :: Events DiffusionTestTrace -> Property
verify_consistency events =
let govEstablishedPeersSig :: Signal (Set NtNAddr)
govEstablishedPeersSig =
selectDiffusionPeerSelectionState'
(EstablishedPeers.toSet . Governor.establishedPeers)
events

govConnectionManagerTransitionsSig :: [E (AbstractTransitionTrace NtNAddr)]
govConnectionManagerTransitionsSig =
Signal.eventsToListWithId
$ Signal.selectEvents
(\case
DiffusionConnectionManagerTransitionTrace tr -> Just tr
_ -> Nothing
) events

in conjoin
$ map (\(E ts a) -> case a of
TransitionTrace addr (Transition _ TerminatedSt) ->
eventually ts (Set.notMember addr) govEstablishedPeersSig
_ -> True -- TODO: Do the opposite
)
govConnectionManagerTransitionsSig

-- | Verify that certain traces are never emitted by the simulation.
--
Expand Down Expand Up @@ -3197,6 +3333,18 @@ selectDiffusionPeerSelectionState f =
DiffusionDebugPeerSelectionTrace (TraceGovernorState _ _ st) -> Just (f st)
_ -> Nothing)

selectDiffusionPeerSelectionState' :: Eq a
=> (forall peerconn. Governor.PeerSelectionState NtNAddr peerconn -> a)
-> Events DiffusionTestTrace
-> Signal a
selectDiffusionPeerSelectionState' f =
-- TODO: #3182 Rng seed should come from quickcheck.
Signal.fromChangeEvents (f $ Governor.emptyPeerSelectionState (mkStdGen 42) [])
. Signal.selectEvents
(\case
DiffusionDebugPeerSelectionTrace (TraceGovernorState _ _ st) -> Just (f st)
_ -> Nothing)

selectDiffusionConnectionManagerEvents
:: Trace () DiffusionTestTrace
-> Trace () (ConnectionManagerTrace NtNAddr
Expand Down

0 comments on commit 8b7326c

Please sign in to comment.