Skip to content

Commit

Permalink
Merge pull request #522 from IntersectMBO/jeltsch/index-choice-with-f…
Browse files Browse the repository at this point in the history
…ixed-selection

Make it possible to choose the index type (fixed choice)
  • Loading branch information
jorisdral authored Feb 17, 2025
2 parents bd366da + 987bb0f commit ad33f2d
Show file tree
Hide file tree
Showing 34 changed files with 652 additions and 474 deletions.
12 changes: 7 additions & 5 deletions bench/macro/lsm-tree-bench-lookups.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ import Database.LSMTree.Extras.Orphans ()
import Database.LSMTree.Extras.UTxO
import Database.LSMTree.Internal.Entry (Entry (Insert),
NumEntries (..))
import Database.LSMTree.Internal.Index.Compact (IndexCompact)
import Database.LSMTree.Internal.Index (Index)
import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact))
import Database.LSMTree.Internal.Lookup
import Database.LSMTree.Internal.Paths (RunFsPaths (RunFsPaths))
import Database.LSMTree.Internal.Run (Run)
Expand Down Expand Up @@ -333,7 +334,7 @@ lookupsEnv ::
-> Run.RunDataCaching
-> IO ( V.Vector (Ref (Run IO FS.HandleIO))
, V.Vector (Bloom SerialisedKey)
, V.Vector IndexCompact
, V.Vector Index
, V.Vector (FS.Handle FS.HandleIO)
)
lookupsEnv runSizes keyRng0 hfs hbio caching = do
Expand All @@ -351,6 +352,7 @@ lookupsEnv runSizes keyRng0 hfs hbio caching = do
(RunFsPaths (FS.mkFsPath []) (RunNumber i))
(NumEntries numEntries)
(RunAllocFixed benchmarkNumBitsPerEntry)
Index.Compact
| ((numEntries, _), i) <- zip runSizes [0..] ]

-- fill the runs
Expand Down Expand Up @@ -428,7 +430,7 @@ benchBloomQueries !bs !keyRng !n
benchIndexSearches ::
ArenaManager RealWorld
-> V.Vector (Bloom SerialisedKey)
-> V.Vector IndexCompact
-> V.Vector Index
-> V.Vector (FS.Handle h)
-> StdGen
-> Int
Expand All @@ -446,7 +448,7 @@ benchIndexSearches !arenaManager !bs !ics !hs !keyRng !n
benchPrepLookups ::
ArenaManager RealWorld
-> V.Vector (Bloom SerialisedKey)
-> V.Vector IndexCompact
-> V.Vector Index
-> V.Vector (FS.Handle h)
-> StdGen
-> Int
Expand All @@ -468,7 +470,7 @@ benchLookupsIO ::
-> Ref (WBB.WriteBufferBlobs IO h)
-> V.Vector (Ref (Run IO h))
-> V.Vector (Bloom SerialisedKey)
-> V.Vector IndexCompact
-> V.Vector Index
-> V.Vector (FS.Handle h)
-> StdGen
-> Int
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ constructIndexCompact ::
-> IndexCompact
constructIndexCompact (ChunkSize csize) apps = runST $ do
ica <- new csize
mapM_ (`append` ica) apps
mapM_ (`appendToCompact` ica) apps
(_, index) <- unsafeEnd ica
pure index

Expand Down
3 changes: 2 additions & 1 deletion bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import Database.LSMTree.Extras.Random (frequency, randomByteStringR,
sampleUniformWithReplacement, uniformWithoutReplacement)
import Database.LSMTree.Extras.UTxO
import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..))
import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact))
import Database.LSMTree.Internal.Lookup (bloomQueries, indexSearches,
intraPageLookups, lookupsIO, prepLookups)
import Database.LSMTree.Internal.Page (getNumPages)
Expand Down Expand Up @@ -191,7 +192,7 @@ lookupsInBatchesEnv Config {..} = do
wbblobs <- WBB.new hasFS (FS.mkFsPath ["0.wbblobs"])
wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hasFS wbblobs)) storedKeys
let fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0)
r <- Run.fromWriteBuffer hasFS hasBlockIO caching (RunAllocFixed 10) fsps wb wbblobs
r <- Run.fromWriteBuffer hasFS hasBlockIO caching (RunAllocFixed 10) Index.Compact fsps wb wbblobs
let NumEntries nentriesReal = Run.size r
assertEqual nentriesReal nentries $ pure ()
-- 42 to 43 entries per page
Expand Down
5 changes: 3 additions & 2 deletions bench/micro/Bench/Database/LSMTree/Internal/Merge.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import qualified Database.LSMTree.Extras.Random as R
import Database.LSMTree.Extras.RunData
import Database.LSMTree.Extras.UTxO
import Database.LSMTree.Internal.Entry
import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact))
import Database.LSMTree.Internal.Merge (MergeType (..))
import qualified Database.LSMTree.Internal.Merge as Merge
import Database.LSMTree.Internal.Paths (RunFsPaths (..))
Expand Down Expand Up @@ -263,7 +264,7 @@ merge ::
merge fs hbio Config {..} targetPaths runs = do
let f = fromMaybe const mergeMappend
m <- fromMaybe (error "empty inputs, no merge created") <$>
Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10)
Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10) Index.Compact
mergeType f targetPaths runs
Merge.stepsToCompletion m stepSize

Expand Down Expand Up @@ -385,7 +386,7 @@ randomRuns ::
-> IO InputRuns
randomRuns hasFS hasBlockIO config@Config {..} rng0 =
V.fromList <$>
zipWithM (unsafeFlushAsWriteBuffer hasFS hasBlockIO)
zipWithM (unsafeFlushAsWriteBuffer hasFS hasBlockIO Index.Compact)
inputRunPaths runsData
where
runsData :: [SerialisedRunData]
Expand Down
58 changes: 52 additions & 6 deletions src-extras/Database/LSMTree/Extras/Index.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
module Database.LSMTree.Extras.Index
(
Append (AppendSinglePage, AppendMultiPage),
appendToCompact,
appendToOrdinary,
append
)
where
Expand All @@ -14,8 +16,15 @@ import Control.Monad.ST.Strict (ST)
import Data.Foldable (toList)
import Data.Word (Word32)
import Database.LSMTree.Internal.Chunk (Chunk)
import Database.LSMTree.Internal.Index (IndexAcc, appendMulti,
import Database.LSMTree.Internal.Index (IndexAcc)
import qualified Database.LSMTree.Internal.Index as Index (appendMulti,
appendSingle)
import Database.LSMTree.Internal.Index.CompactAcc (IndexCompactAcc)
import qualified Database.LSMTree.Internal.Index.CompactAcc as IndexCompact
(appendMulti, appendSingle)
import Database.LSMTree.Internal.Index.OrdinaryAcc (IndexOrdinaryAcc)
import qualified Database.LSMTree.Internal.Index.OrdinaryAcc as IndexOrdinary
(appendMulti, appendSingle)
import Database.LSMTree.Internal.Serialise (SerialisedKey)

-- | Instruction for appending pages, to be used in conjunction with indexes.
Expand All @@ -42,14 +51,51 @@ instance NFData Append where

{-|
Adds information about appended pages to an index and outputs newly
available chunks.
available chunks, using primitives specific to the type of the index.
See the documentation of the 'IndexAcc' class for constraints to adhere to.
See the documentation of the 'IndexAcc' type for constraints to adhere to.
-}
append :: IndexAcc j => Append -> j s -> ST s [Chunk]
append instruction indexAcc = case instruction of
appendWith :: ((SerialisedKey, SerialisedKey) -> j s -> ST s (Maybe Chunk))
-> ((SerialisedKey, Word32) -> j s -> ST s [Chunk])
-> Append
-> j s
-> ST s [Chunk]
appendWith appendSingle appendMulti instruction indexAcc = case instruction of
AppendSinglePage minKey maxKey
-> toList <$> appendSingle (minKey, maxKey) indexAcc
AppendMultiPage key overflowPageCount
-> appendMulti (key, overflowPageCount) indexAcc
{-# INLINABLE append #-}
{-# INLINABLE appendWith #-}

{-|
Adds information about appended pages to a compact index and outputs newly
available chunks.
See the documentation of the 'IndexAcc' type for constraints to adhere to.
-}
appendToCompact :: Append -> IndexCompactAcc s -> ST s [Chunk]
appendToCompact = appendWith IndexCompact.appendSingle
IndexCompact.appendMulti
{-# INLINE appendToCompact #-}

{-|
Adds information about appended pages to an ordinary index and outputs newly
available chunks.
See the documentation of the 'IndexAcc' type for constraints to adhere to.
-}
appendToOrdinary :: Append -> IndexOrdinaryAcc s -> ST s [Chunk]
appendToOrdinary = appendWith IndexOrdinary.appendSingle
IndexOrdinary.appendMulti
{-# INLINE appendToOrdinary #-}

{-|
Adds information about appended pages to an index and outputs newly
available chunks.
See the documentation of the 'IndexAcc' type for constraints to adhere to.
-}
append :: Append -> IndexAcc s -> ST s [Chunk]
append = appendWith Index.appendSingle
Index.appendMulti
{-# INLINE append #-}
43 changes: 41 additions & 2 deletions src-extras/Database/LSMTree/Extras/NoThunks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,15 @@ import Database.LSMTree.Internal as Internal
import Database.LSMTree.Internal.BlobFile
import Database.LSMTree.Internal.BlobRef
import Database.LSMTree.Internal.ChecksumHandle
import Database.LSMTree.Internal.Chunk
import Database.LSMTree.Internal.Config
import Database.LSMTree.Internal.CRC32C
import Database.LSMTree.Internal.Entry
import Database.LSMTree.Internal.Index
import Database.LSMTree.Internal.Index.Compact
import Database.LSMTree.Internal.Index.CompactAcc
import Database.LSMTree.Internal.Index.Ordinary
import Database.LSMTree.Internal.Index.OrdinaryAcc
import Database.LSMTree.Internal.Merge
import qualified Database.LSMTree.Internal.Merge as Merge
import Database.LSMTree.Internal.MergeSchedule
Expand All @@ -64,6 +68,7 @@ import Database.LSMTree.Internal.RunReaders
import Database.LSMTree.Internal.Serialise
import Database.LSMTree.Internal.UniqCounter
import Database.LSMTree.Internal.Unsliced
import Database.LSMTree.Internal.Vector.Growing
import Database.LSMTree.Internal.WriteBuffer
import Database.LSMTree.Internal.WriteBufferBlobs
import GHC.Generics
Expand Down Expand Up @@ -275,7 +280,7 @@ deriving anyclass instance Typeable (PrimState m)
=> NoThunks (FilePointer m)

{-------------------------------------------------------------------------------
IndexCompact
Index
-------------------------------------------------------------------------------}

deriving stock instance Generic IndexCompact
Expand All @@ -284,6 +289,12 @@ deriving anyclass instance NoThunks IndexCompact
deriving stock instance Generic PageNo
deriving anyclass instance NoThunks PageNo

deriving stock instance Generic IndexOrdinary
deriving anyclass instance NoThunks IndexOrdinary

deriving stock instance Generic Index
deriving anyclass instance NoThunks Index

{-------------------------------------------------------------------------------
MergeSchedule
-------------------------------------------------------------------------------}
Expand Down Expand Up @@ -398,7 +409,7 @@ deriving anyclass instance Typeable s
=> NoThunks (RunAcc s)

{-------------------------------------------------------------------------------
IndexCompactAcc
IndexAcc
-------------------------------------------------------------------------------}

deriving stock instance Generic (IndexCompactAcc s)
Expand All @@ -408,6 +419,30 @@ deriving anyclass instance Typeable s
deriving stock instance Generic (SMaybe a)
deriving anyclass instance NoThunks a => NoThunks (SMaybe a)

deriving stock instance Generic (IndexOrdinaryAcc s)
deriving anyclass instance Typeable s
=> NoThunks (IndexOrdinaryAcc s)

deriving stock instance Generic (IndexAcc s)
deriving anyclass instance Typeable s
=> NoThunks (IndexAcc s)

{-------------------------------------------------------------------------------
GrowingVector
-------------------------------------------------------------------------------}

deriving stock instance Generic (GrowingVector s a)
deriving anyclass instance (Typeable s, Typeable a, NoThunks a)
=> NoThunks (GrowingVector s a)

{-------------------------------------------------------------------------------
Baler
-------------------------------------------------------------------------------}

deriving stock instance Generic (Baler s)
deriving anyclass instance Typeable s
=> NoThunks (Baler s)

{-------------------------------------------------------------------------------
PageAcc
-------------------------------------------------------------------------------}
Expand Down Expand Up @@ -681,6 +716,10 @@ deriving via OnlyCheckWhnf (VUM.MVector s Word64)
deriving via OnlyCheckWhnf (VUM.MVector s Bit)
instance Typeable s => NoThunks (VUM.MVector s Bit)

-- TODO: upstream to @nothunks@
deriving via OnlyCheckWhnf (VP.MVector s Word8)
instance Typeable s => NoThunks (VP.MVector s Word8)

{-------------------------------------------------------------------------------
ST
-------------------------------------------------------------------------------}
Expand Down
16 changes: 10 additions & 6 deletions src-extras/Database/LSMTree/Extras/RunData.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import qualified Data.Vector as V
import Database.LSMTree.Extras (showPowersOf10)
import Database.LSMTree.Extras.Generators ()
import Database.LSMTree.Internal.Entry
import Database.LSMTree.Internal.Index (IndexType)
import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue)
import Database.LSMTree.Internal.MergeSchedule (addWriteBufferEntries)
import Database.LSMTree.Internal.Paths
Expand Down Expand Up @@ -63,13 +64,14 @@ import Test.QuickCheck
withRun ::
HasFS IO h
-> HasBlockIO IO h
-> IndexType
-> RunFsPaths
-> SerialisedRunData
-> (Ref (Run IO h) -> IO a)
-> IO a
withRun hfs hbio path rd = do
withRun hfs hbio indexType path rd = do
bracket
(unsafeFlushAsWriteBuffer hfs hbio path $ serialiseRunData rd)
(unsafeFlushAsWriteBuffer hfs hbio indexType path $ serialiseRunData rd)
releaseRef

{-# INLINABLE withRuns #-}
Expand All @@ -78,12 +80,13 @@ withRuns ::
Traversable f
=> HasFS IO h
-> HasBlockIO IO h
-> IndexType
-> f (RunFsPaths, SerialisedRunData)
-> (f (Ref (Run IO h)) -> IO a)
-> IO a
withRuns hfs hbio xs = do
withRuns hfs hbio indexType xs = do
bracket
(forM xs $ \(path, rd) -> unsafeFlushAsWriteBuffer hfs hbio path rd)
(forM xs $ \(path, rd) -> unsafeFlushAsWriteBuffer hfs hbio indexType path rd)
(mapM_ releaseRef)

-- | Flush serialised run data to disk as if it were a write buffer.
Expand All @@ -95,14 +98,15 @@ withRuns hfs hbio xs = do
unsafeFlushAsWriteBuffer ::
HasFS IO h
-> HasBlockIO IO h
-> IndexType
-> RunFsPaths
-> SerialisedRunData
-> IO (Ref (Run IO h))
unsafeFlushAsWriteBuffer fs hbio fsPaths (RunData m) = do
unsafeFlushAsWriteBuffer fs hbio indexType fsPaths (RunData m) = do
let blobpath = addExtension (runBlobPath fsPaths) ".wb"
wbblobs <- WBB.new fs blobpath
wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob fs wbblobs)) m
run <- Run.fromWriteBuffer fs hbio CacheRunData (RunAllocFixed 10)
run <- Run.fromWriteBuffer fs hbio CacheRunData (RunAllocFixed 10) indexType
fsPaths wb wbblobs
releaseRef wbblobs
return run
Expand Down
Loading

0 comments on commit ad33f2d

Please sign in to comment.