Skip to content

Commit 515ba88

Browse files
authored
GDD: trigger chain selection when caught-up (#1505)
Closes #1503 The first commit adds a regression test, which is then fixed by the second and third commit in this PR. Reverting either commit makes the regression test fail as expected.
2 parents 8e3afe1 + 5c2bd47 commit 515ba88

File tree

8 files changed

+374
-47
lines changed

8 files changed

+374
-47
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<!--
2+
Testing changes only
3+
-->

ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ test-suite consensus-test
238238
Test.Consensus.Genesis.Tests.CSJ
239239
Test.Consensus.Genesis.Tests.DensityDisconnect
240240
Test.Consensus.Genesis.Tests.LoE
241+
Test.Consensus.Genesis.Tests.LoE.CaughtUp
241242
Test.Consensus.Genesis.Tests.LoP
242243
Test.Consensus.Genesis.Tests.LongRangeAttack
243244
Test.Consensus.Genesis.Tests.Uniform

ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import Ouroboros.Network.Driver.Limits
1414
(ProtocolLimitFailure (ExceededTimeLimit))
1515
import Test.Consensus.BlockTree (BlockTree (..), BlockTreeBranch (..))
1616
import Test.Consensus.Genesis.Setup
17+
import qualified Test.Consensus.Genesis.Tests.LoE.CaughtUp as LoE.CaughtUp
1718
import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..),
1819
defaultSchedulerConfig)
1920
import Test.Consensus.PeerSimulator.StateView
@@ -38,7 +39,8 @@ tests =
3839
adjustQuickCheckMaxSize (`div` 5) $
3940
testProperty "adversary does not hit timeouts" (prop_adversaryHitsTimeouts False),
4041
adjustQuickCheckMaxSize (`div` 5) $
41-
testProperty "adversary hits timeouts" (prop_adversaryHitsTimeouts True)
42+
testProperty "adversary hits timeouts" (prop_adversaryHitsTimeouts True),
43+
LoE.CaughtUp.tests
4244
]
4345

4446
-- | Tests that the selection advances in presence of the LoE when a peer is
Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
{-# LANGUAGE BlockArguments #-}
2+
{-# LANGUAGE DataKinds #-}
3+
{-# LANGUAGE DerivingStrategies #-}
4+
{-# LANGUAGE NamedFieldPuns #-}
5+
{-# LANGUAGE ScopedTypeVariables #-}
6+
{-# LANGUAGE TypeApplications #-}
7+
8+
-- | This is a regression test for
9+
-- <https://github.com/IntersectMBO/ouroboros-consensus/issues/1503>.
10+
--
11+
-- Concretely, consider @k = 1@ (security parameter), and a syncing Genesis
12+
-- enabled.
13+
--
14+
-- Now consider the following block tree:
15+
--
16+
-- > G :> A >: C
17+
-- > :> B
18+
--
19+
-- Suppose that we have two peers, Peer 1 and Peer 2:
20+
--
21+
-- * Peer 1 first sends A, then C, then rolls back to A, and then idles.
22+
--
23+
-- * Peer 2 sends B and then idles.
24+
--
25+
-- In any possible interleaving (tested using IOSimPOR), the node should in the
26+
-- end be caught-up and have selected C as it is the best chain.
27+
--
28+
-- To (somewhat) simplify the test setup boilerplate, we do not actually run
29+
-- ChainSync and BlockFetch, but rather simulate their behavior by modifying the
30+
-- ChainSync client state (eg candidate fragments) as well as adding blocks to
31+
-- the ChainDB.
32+
module Test.Consensus.Genesis.Tests.LoE.CaughtUp (tests) where
33+
34+
import Cardano.Ledger.BaseTypes (knownNonZeroBounded)
35+
import Control.Monad (join)
36+
import Control.Monad.Class.MonadTest (MonadTest (..))
37+
import qualified Control.Monad.Class.MonadTimer.SI as SI
38+
import Control.Monad.IOSim (exploreSimTrace, traceResult)
39+
import Control.ResourceRegistry
40+
import Control.Tracer (nullTracer)
41+
import Data.Function (on)
42+
import Data.Functor (void)
43+
import Ouroboros.Consensus.Block
44+
import Ouroboros.Consensus.Config
45+
import Ouroboros.Consensus.Genesis.Governor (gddWatcher)
46+
import Ouroboros.Consensus.HeaderValidation (HeaderWithTime)
47+
import Ouroboros.Consensus.MiniProtocol.ChainSync.Client
48+
(ChainSyncClientHandle (..),
49+
ChainSyncClientHandleCollection (..), ChainSyncState (..),
50+
newChainSyncClientHandleCollection)
51+
import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State
52+
(ChainSyncJumpingState (..), DisengagedInitState (..))
53+
import Ouroboros.Consensus.Node.Genesis (setGetLoEFragment)
54+
import qualified Ouroboros.Consensus.Node.GSM as GSM
55+
import Ouroboros.Consensus.Node.GsmState
56+
import Ouroboros.Consensus.NodeId
57+
import qualified Ouroboros.Consensus.Storage.ChainDB as ChainDB
58+
import Ouroboros.Consensus.Storage.ChainDB.API (ChainDB)
59+
import qualified Ouroboros.Consensus.Storage.ChainDB.API.Types.InvalidBlockPunishment as Punishment
60+
import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB.Impl
61+
import qualified Ouroboros.Consensus.Storage.ChainDB.Impl.Args as ChainDB
62+
import Ouroboros.Consensus.Util.AnchoredFragment
63+
(preferAnchoredCandidate)
64+
import Ouroboros.Consensus.Util.IOLike
65+
import Ouroboros.Consensus.Util.STM (forkLinkedWatcher)
66+
import Ouroboros.Network.AnchoredFragment (AnchoredFragment)
67+
import qualified Ouroboros.Network.AnchoredFragment as AF
68+
import Test.QuickCheck
69+
import Test.Tasty
70+
import Test.Tasty.QuickCheck
71+
import Test.Util.ChainDB
72+
import Test.Util.Header
73+
import Test.Util.Orphans.IOLike ()
74+
import Test.Util.TestBlock
75+
76+
tests :: TestTree
77+
tests = testProperty "Select best chain when CaughtUp" prop_test
78+
79+
prop_test :: Property
80+
prop_test =
81+
exploreSimTrace id (exploreRaces *> run) \_ tr ->
82+
case traceResult False tr of
83+
Right prop -> prop
84+
Left e -> counterexample ("Failure: " <> show e) False
85+
86+
run :: forall m. (IOLike m, SI.MonadTimer m) => m Property
87+
run = withRegistry \registry -> do
88+
-- Setup
89+
varGsmState <- newTVarIO PreSyncing
90+
varLoEFragment <- newTVarIO $ AF.Empty AF.AnchorGenesis
91+
varGetLoEFragment <- newTVarIO $ pure $
92+
ChainDB.LoEEnabled $ AF.Empty AF.AnchorGenesis
93+
setGetLoEFragment
94+
(readTVar varGsmState)
95+
(readTVar varLoEFragment)
96+
varGetLoEFragment
97+
98+
chainDB <- openChainDB registry (join $ readTVarIO varGetLoEFragment)
99+
let addBlk = ChainDB.addBlock_ chainDB Punishment.noPunishment
100+
101+
chainSyncHandles <- atomically newChainSyncClientHandleCollection
102+
103+
_ <- forkLinkedThread registry "GSM" $
104+
GSM.enterPreSyncing $ mkGsmEntryPoints
105+
chainSyncHandles
106+
chainDB
107+
(atomically . writeTVar varGsmState)
108+
109+
forkGDD
110+
registry
111+
chainSyncHandles
112+
chainDB
113+
(readTVar varGsmState)
114+
varLoEFragment
115+
116+
-- Make sure that the ChainDB background thread, the GSM and the GDD are
117+
-- running (any positive amount should do).
118+
threadDelay 1
119+
120+
-- Simulate receiving A, B, C and C being rolled back. In the real system,
121+
-- this would happen via ChainSync and BlockFetch.
122+
123+
_ <- forkLinkedThread registry "Peer1" $ do
124+
-- First, let Peer1 connect, serving block A (without idling).
125+
let initialFrag = attachSlotTimeToFragment cfg $
126+
AF.Empty AF.AnchorGenesis AF.:> getHeader blkA
127+
hdl <- atomically $ mkTestChainSyncClientHandle initialFrag
128+
atomically $ cschcAddHandle chainSyncHandles peer1 hdl
129+
addBlk blkA
130+
131+
-- Then, send C.
132+
atomically $ modifyTVar (cschState hdl) $ \s -> ChainSyncState {
133+
csCandidate = csCandidate s AF.:> attachSlotTime cfg (getHeader blkC)
134+
, csLatestSlot = pure $ NotOrigin $ blockSlot blkC
135+
, csIdling = csIdling s
136+
}
137+
addBlk blkC
138+
139+
-- Finally, roll back to the initial fragment and idle.
140+
atomically $ modifyTVar (cschState hdl) $ \_s -> ChainSyncState {
141+
csCandidate = initialFrag
142+
, csLatestSlot = pure $ AF.headSlot initialFrag
143+
, csIdling = True
144+
}
145+
146+
_ <- forkLinkedThread registry "Peer2" $ do
147+
-- Let Peer2 connect and send B.
148+
hdl <- atomically
149+
$ mkTestChainSyncClientHandle
150+
$ attachSlotTimeToFragment cfg
151+
$ AF.Empty AF.AnchorGenesis AF.:> getHeader blkB
152+
atomically $ cschcAddHandle chainSyncHandles peer2 hdl
153+
addBlk blkB
154+
155+
-- Finally, idle.
156+
atomically $ modifyTVar (cschState hdl) $ \s -> ChainSyncState {
157+
csCandidate = csCandidate s
158+
, csLatestSlot = csLatestSlot s
159+
, csIdling = True
160+
}
161+
162+
-- Give time to process the new blocks (any positive amount should do).
163+
threadDelay 1
164+
165+
gsmState <- atomically $ readTVar varGsmState
166+
tipPt <- atomically $ AF.headPoint <$> ChainDB.getCurrentChain chainDB
167+
pure $ conjoin
168+
[ gsmState === CaughtUp
169+
, counterexample ("Selection tip is not C") $
170+
castPoint tipPt === blockPoint blkC
171+
]
172+
where
173+
peer1, peer2 :: CoreNodeId
174+
peer1 = CoreNodeId 1
175+
peer2 = CoreNodeId 2
176+
177+
blkA, blkB, blkC :: TestBlock
178+
blkA = firstBlock 1
179+
blkB = firstBlock 2
180+
blkC = successorBlock blkA
181+
182+
{-------------------------------------------------------------------------------
183+
Boilerplate for setting up the various test components
184+
-------------------------------------------------------------------------------}
185+
186+
cfg :: TopLevelConfig TestBlock
187+
cfg =
188+
singleNodeTestConfigWith
189+
TestBlockCodecConfig
190+
TestBlockStorageConfig
191+
-- To make the test as simple as possible (otherwise, "saturating" the LoE
192+
-- requires more blocks).
193+
(SecurityParam $ knownNonZeroBounded @1)
194+
-- large Genesis window to avoid disconnecting any peers
195+
(GenesisWindow 20)
196+
197+
mkTestChainSyncClientHandle ::
198+
forall m. IOLike m
199+
=> AnchoredFragment (HeaderWithTime TestBlock)
200+
-> STM m (ChainSyncClientHandle m TestBlock)
201+
mkTestChainSyncClientHandle frag = do
202+
varState <- newTVar ChainSyncState {
203+
csCandidate = frag
204+
, csIdling = False
205+
, csLatestSlot = pure $ AF.headSlot frag
206+
}
207+
varJumping <- newTVar $ Disengaged DisengagedDone
208+
varJumpInfo <- newTVar Nothing
209+
pure ChainSyncClientHandle {
210+
cschState = varState
211+
-- Irrelevant for this test (as we don't actually run ChainSync).
212+
, cschOnGsmStateChanged = \_gsmState _curTime -> pure ()
213+
, cschGDDKill = pure ()
214+
, cschJumping = varJumping
215+
, cschJumpInfo = varJumpInfo
216+
}
217+
218+
openChainDB ::
219+
forall m. IOLike m
220+
=> ResourceRegistry m
221+
-> ChainDB.GetLoEFragment m TestBlock
222+
-> m (ChainDB m TestBlock)
223+
openChainDB registry getLoEFragment = do
224+
chainDbArgs <- do
225+
mcdbNodeDBs <- emptyNodeDBs
226+
let mcdbTopLevelConfig = cfg
227+
configureLoE a = a { ChainDB.cdbsArgs =
228+
(ChainDB.cdbsArgs a) { ChainDB.cdbsLoE = getLoEFragment }
229+
}
230+
pure $ configureLoE $ fromMinimalChainDbArgs MinimalChainDbArgs{
231+
mcdbChunkInfo = mkTestChunkInfo mcdbTopLevelConfig
232+
, mcdbInitLedger = testInitExtLedger
233+
, mcdbRegistry = registry
234+
, mcdbTopLevelConfig
235+
, mcdbNodeDBs
236+
}
237+
(_, (chainDB, ChainDB.Impl.Internal{ChainDB.Impl.intAddBlockRunner})) <-
238+
allocate
239+
registry
240+
(\_ -> ChainDB.Impl.openDBInternal chainDbArgs False)
241+
(ChainDB.closeDB . fst)
242+
_ <- forkLinkedThread registry "AddBlockRunner" intAddBlockRunner
243+
pure chainDB
244+
245+
mkGsmEntryPoints ::
246+
forall m. (IOLike m, SI.MonadTimer m)
247+
=> ChainSyncClientHandleCollection CoreNodeId m TestBlock
248+
-> ChainDB m TestBlock
249+
-> (GsmState -> m ())
250+
-> GSM.GsmEntryPoints m
251+
mkGsmEntryPoints varChainSyncHandles chainDB writeGsmState =
252+
GSM.realGsmEntryPoints (id, nullTracer) GSM.GsmView {
253+
GSM.candidateOverSelection
254+
, GSM.peerIsIdle = csIdling
255+
, GSM.equivalent = (==) `on` AF.headPoint
256+
, GSM.getChainSyncStates = fmap cschState <$> cschcMap varChainSyncHandles
257+
, GSM.getCurrentSelection = ChainDB.getCurrentChain chainDB
258+
-- Make sure that we stay in CaughtUp for the duration of the test once we
259+
-- have entered it.
260+
, GSM.minCaughtUpDuration = 10 -- seconds
261+
, GSM.writeGsmState
262+
-- Not interesting for this test.
263+
, GSM.antiThunderingHerd = Nothing
264+
, GSM.setCaughtUpPersistentMark = \_ -> pure ()
265+
, GSM.durationUntilTooOld = Nothing
266+
, GSM.isHaaSatisfied = pure True
267+
}
268+
where
269+
candidateOverSelection selection candidateState =
270+
case AF.intersectionPoint selection candFrag of
271+
Nothing -> GSM.CandidateDoesNotIntersect
272+
Just{} -> -- precondition requires intersection
273+
GSM.WhetherCandidateIsBetter
274+
$ preferAnchoredCandidate (configBlock cfg) selection candFrag
275+
where
276+
candFrag = csCandidate candidateState
277+
278+
forkGDD ::
279+
forall m. IOLike m
280+
=> ResourceRegistry m
281+
-> ChainSyncClientHandleCollection CoreNodeId m TestBlock
282+
-> ChainDB m TestBlock
283+
-> STM m GsmState
284+
-> StrictTVar m (AnchoredFragment (HeaderWithTime TestBlock))
285+
-> m ()
286+
forkGDD registry varChainSyncHandles chainDB getGsmState varLoEFrag =
287+
void $ forkLinkedWatcher registry "GDD" $ gddWatcher
288+
cfg
289+
nullTracer
290+
chainDB
291+
(0 :: DiffTime) -- no rate limiting
292+
getGsmState
293+
(cschcMap varChainSyncHandles)
294+
varLoEFrag
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Patch
2+
3+
- Changed ChainSel to reprocess LoE-delayed blocks even when LoE is disabled.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
### Patch
2+
3+
- Changed GDD to trigger chain selection when caught-up. In certain edge cases,
4+
this enables the node to promptly select a better chain right after concluding
5+
that it is caught-up.

0 commit comments

Comments
 (0)