diff --git a/bench/macro/lsm-tree-bench-lookups.hs b/bench/macro/lsm-tree-bench-lookups.hs index 3f927a5a6..af6ee7589 100644 --- a/bench/macro/lsm-tree-bench-lookups.hs +++ b/bench/macro/lsm-tree-bench-lookups.hs @@ -25,7 +25,8 @@ import Database.LSMTree.Extras.Orphans () import Database.LSMTree.Extras.UTxO import Database.LSMTree.Internal.Entry (Entry (Insert), NumEntries (..)) -import Database.LSMTree.Internal.Index.Compact (IndexCompact) +import Database.LSMTree.Internal.Index (Index) +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Lookup import Database.LSMTree.Internal.Paths (RunFsPaths (RunFsPaths)) import Database.LSMTree.Internal.Run (Run) @@ -333,7 +334,7 @@ lookupsEnv :: -> Run.RunDataCaching -> IO ( V.Vector (Ref (Run IO FS.HandleIO)) , V.Vector (Bloom SerialisedKey) - , V.Vector IndexCompact + , V.Vector Index , V.Vector (FS.Handle FS.HandleIO) ) lookupsEnv runSizes keyRng0 hfs hbio caching = do @@ -351,6 +352,7 @@ lookupsEnv runSizes keyRng0 hfs hbio caching = do (RunFsPaths (FS.mkFsPath []) (RunNumber i)) (NumEntries numEntries) (RunAllocFixed benchmarkNumBitsPerEntry) + Index.Compact | ((numEntries, _), i) <- zip runSizes [0..] ] -- fill the runs @@ -428,7 +430,7 @@ benchBloomQueries !bs !keyRng !n benchIndexSearches :: ArenaManager RealWorld -> V.Vector (Bloom SerialisedKey) - -> V.Vector IndexCompact + -> V.Vector Index -> V.Vector (FS.Handle h) -> StdGen -> Int @@ -446,7 +448,7 @@ benchIndexSearches !arenaManager !bs !ics !hs !keyRng !n benchPrepLookups :: ArenaManager RealWorld -> V.Vector (Bloom SerialisedKey) - -> V.Vector IndexCompact + -> V.Vector Index -> V.Vector (FS.Handle h) -> StdGen -> Int @@ -468,7 +470,7 @@ benchLookupsIO :: -> Ref (WBB.WriteBufferBlobs IO h) -> V.Vector (Ref (Run IO h)) -> V.Vector (Bloom SerialisedKey) - -> V.Vector IndexCompact + -> V.Vector Index -> V.Vector (FS.Handle h) -> StdGen -> Int diff --git a/bench/micro/Bench/Database/LSMTree/Internal/Index/Compact.hs b/bench/micro/Bench/Database/LSMTree/Internal/Index/Compact.hs index e18741746..0cf489580 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/Index/Compact.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/Index/Compact.hs @@ -84,7 +84,7 @@ constructIndexCompact :: -> IndexCompact constructIndexCompact (ChunkSize csize) apps = runST $ do ica <- new csize - mapM_ (`append` ica) apps + mapM_ (`appendToCompact` ica) apps (_, index) <- unsafeEnd ica pure index diff --git a/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs b/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs index 5efcb41e9..bd88c2476 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs @@ -21,6 +21,7 @@ import Database.LSMTree.Extras.Random (frequency, randomByteStringR, sampleUniformWithReplacement, uniformWithoutReplacement) import Database.LSMTree.Extras.UTxO import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..)) +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Lookup (bloomQueries, indexSearches, intraPageLookups, lookupsIO, prepLookups) import Database.LSMTree.Internal.Page (getNumPages) @@ -191,7 +192,7 @@ lookupsInBatchesEnv Config {..} = do wbblobs <- WBB.new hasFS (FS.mkFsPath ["0.wbblobs"]) wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hasFS wbblobs)) storedKeys let fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0) - r <- Run.fromWriteBuffer hasFS hasBlockIO caching (RunAllocFixed 10) fsps wb wbblobs + r <- Run.fromWriteBuffer hasFS hasBlockIO caching (RunAllocFixed 10) Index.Compact fsps wb wbblobs let NumEntries nentriesReal = Run.size r assertEqual nentriesReal nentries $ pure () -- 42 to 43 entries per page diff --git a/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs b/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs index b64d6c0ef..7878010a1 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs @@ -18,6 +18,7 @@ import qualified Database.LSMTree.Extras.Random as R import Database.LSMTree.Extras.RunData import Database.LSMTree.Extras.UTxO import Database.LSMTree.Internal.Entry +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Merge (MergeType (..)) import qualified Database.LSMTree.Internal.Merge as Merge import Database.LSMTree.Internal.Paths (RunFsPaths (..)) @@ -263,7 +264,7 @@ merge :: merge fs hbio Config {..} targetPaths runs = do let f = fromMaybe const mergeMappend m <- fromMaybe (error "empty inputs, no merge created") <$> - Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10) + Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10) Index.Compact mergeType f targetPaths runs Merge.stepsToCompletion m stepSize @@ -385,7 +386,7 @@ randomRuns :: -> IO InputRuns randomRuns hasFS hasBlockIO config@Config {..} rng0 = V.fromList <$> - zipWithM (unsafeFlushAsWriteBuffer hasFS hasBlockIO) + zipWithM (unsafeFlushAsWriteBuffer hasFS hasBlockIO Index.Compact) inputRunPaths runsData where runsData :: [SerialisedRunData] diff --git a/src-extras/Database/LSMTree/Extras/Index.hs b/src-extras/Database/LSMTree/Extras/Index.hs index 56bd343e1..89b8907b3 100644 --- a/src-extras/Database/LSMTree/Extras/Index.hs +++ b/src-extras/Database/LSMTree/Extras/Index.hs @@ -5,6 +5,8 @@ module Database.LSMTree.Extras.Index ( Append (AppendSinglePage, AppendMultiPage), + appendToCompact, + appendToOrdinary, append ) where @@ -14,8 +16,15 @@ import Control.Monad.ST.Strict (ST) import Data.Foldable (toList) import Data.Word (Word32) import Database.LSMTree.Internal.Chunk (Chunk) -import Database.LSMTree.Internal.Index (IndexAcc, appendMulti, +import Database.LSMTree.Internal.Index (IndexAcc) +import qualified Database.LSMTree.Internal.Index as Index (appendMulti, appendSingle) +import Database.LSMTree.Internal.Index.CompactAcc (IndexCompactAcc) +import qualified Database.LSMTree.Internal.Index.CompactAcc as IndexCompact + (appendMulti, appendSingle) +import Database.LSMTree.Internal.Index.OrdinaryAcc (IndexOrdinaryAcc) +import qualified Database.LSMTree.Internal.Index.OrdinaryAcc as IndexOrdinary + (appendMulti, appendSingle) import Database.LSMTree.Internal.Serialise (SerialisedKey) -- | Instruction for appending pages, to be used in conjunction with indexes. @@ -42,14 +51,51 @@ instance NFData Append where {-| Adds information about appended pages to an index and outputs newly - available chunks. + available chunks, using primitives specific to the type of the index. - See the documentation of the 'IndexAcc' class for constraints to adhere to. + See the documentation of the 'IndexAcc' type for constraints to adhere to. -} -append :: IndexAcc j => Append -> j s -> ST s [Chunk] -append instruction indexAcc = case instruction of +appendWith :: ((SerialisedKey, SerialisedKey) -> j s -> ST s (Maybe Chunk)) + -> ((SerialisedKey, Word32) -> j s -> ST s [Chunk]) + -> Append + -> j s + -> ST s [Chunk] +appendWith appendSingle appendMulti instruction indexAcc = case instruction of AppendSinglePage minKey maxKey -> toList <$> appendSingle (minKey, maxKey) indexAcc AppendMultiPage key overflowPageCount -> appendMulti (key, overflowPageCount) indexAcc -{-# INLINABLE append #-} +{-# INLINABLE appendWith #-} + +{-| + Adds information about appended pages to a compact index and outputs newly + available chunks. + + See the documentation of the 'IndexAcc' type for constraints to adhere to. +-} +appendToCompact :: Append -> IndexCompactAcc s -> ST s [Chunk] +appendToCompact = appendWith IndexCompact.appendSingle + IndexCompact.appendMulti +{-# INLINE appendToCompact #-} + +{-| + Adds information about appended pages to an ordinary index and outputs newly + available chunks. + + See the documentation of the 'IndexAcc' type for constraints to adhere to. +-} +appendToOrdinary :: Append -> IndexOrdinaryAcc s -> ST s [Chunk] +appendToOrdinary = appendWith IndexOrdinary.appendSingle + IndexOrdinary.appendMulti +{-# INLINE appendToOrdinary #-} + +{-| + Adds information about appended pages to an index and outputs newly + available chunks. + + See the documentation of the 'IndexAcc' type for constraints to adhere to. +-} +append :: Append -> IndexAcc s -> ST s [Chunk] +append = appendWith Index.appendSingle + Index.appendMulti +{-# INLINE append #-} diff --git a/src-extras/Database/LSMTree/Extras/NoThunks.hs b/src-extras/Database/LSMTree/Extras/NoThunks.hs index 578d6c6de..f15c4a3ec 100644 --- a/src-extras/Database/LSMTree/Extras/NoThunks.hs +++ b/src-extras/Database/LSMTree/Extras/NoThunks.hs @@ -38,11 +38,15 @@ import Database.LSMTree.Internal as Internal import Database.LSMTree.Internal.BlobFile import Database.LSMTree.Internal.BlobRef import Database.LSMTree.Internal.ChecksumHandle +import Database.LSMTree.Internal.Chunk import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.CRC32C import Database.LSMTree.Internal.Entry +import Database.LSMTree.Internal.Index import Database.LSMTree.Internal.Index.Compact import Database.LSMTree.Internal.Index.CompactAcc +import Database.LSMTree.Internal.Index.Ordinary +import Database.LSMTree.Internal.Index.OrdinaryAcc import Database.LSMTree.Internal.Merge import qualified Database.LSMTree.Internal.Merge as Merge import Database.LSMTree.Internal.MergeSchedule @@ -64,6 +68,7 @@ import Database.LSMTree.Internal.RunReaders import Database.LSMTree.Internal.Serialise import Database.LSMTree.Internal.UniqCounter import Database.LSMTree.Internal.Unsliced +import Database.LSMTree.Internal.Vector.Growing import Database.LSMTree.Internal.WriteBuffer import Database.LSMTree.Internal.WriteBufferBlobs import GHC.Generics @@ -275,7 +280,7 @@ deriving anyclass instance Typeable (PrimState m) => NoThunks (FilePointer m) {------------------------------------------------------------------------------- - IndexCompact + Index -------------------------------------------------------------------------------} deriving stock instance Generic IndexCompact @@ -284,6 +289,12 @@ deriving anyclass instance NoThunks IndexCompact deriving stock instance Generic PageNo deriving anyclass instance NoThunks PageNo +deriving stock instance Generic IndexOrdinary +deriving anyclass instance NoThunks IndexOrdinary + +deriving stock instance Generic Index +deriving anyclass instance NoThunks Index + {------------------------------------------------------------------------------- MergeSchedule -------------------------------------------------------------------------------} @@ -398,7 +409,7 @@ deriving anyclass instance Typeable s => NoThunks (RunAcc s) {------------------------------------------------------------------------------- - IndexCompactAcc + IndexAcc -------------------------------------------------------------------------------} deriving stock instance Generic (IndexCompactAcc s) @@ -408,6 +419,30 @@ deriving anyclass instance Typeable s deriving stock instance Generic (SMaybe a) deriving anyclass instance NoThunks a => NoThunks (SMaybe a) +deriving stock instance Generic (IndexOrdinaryAcc s) +deriving anyclass instance Typeable s + => NoThunks (IndexOrdinaryAcc s) + +deriving stock instance Generic (IndexAcc s) +deriving anyclass instance Typeable s + => NoThunks (IndexAcc s) + +{------------------------------------------------------------------------------- + GrowingVector +-------------------------------------------------------------------------------} + +deriving stock instance Generic (GrowingVector s a) +deriving anyclass instance (Typeable s, Typeable a, NoThunks a) + => NoThunks (GrowingVector s a) + +{------------------------------------------------------------------------------- + Baler +-------------------------------------------------------------------------------} + +deriving stock instance Generic (Baler s) +deriving anyclass instance Typeable s + => NoThunks (Baler s) + {------------------------------------------------------------------------------- PageAcc -------------------------------------------------------------------------------} @@ -681,6 +716,10 @@ deriving via OnlyCheckWhnf (VUM.MVector s Word64) deriving via OnlyCheckWhnf (VUM.MVector s Bit) instance Typeable s => NoThunks (VUM.MVector s Bit) +-- TODO: upstream to @nothunks@ +deriving via OnlyCheckWhnf (VP.MVector s Word8) + instance Typeable s => NoThunks (VP.MVector s Word8) + {------------------------------------------------------------------------------- ST -------------------------------------------------------------------------------} diff --git a/src-extras/Database/LSMTree/Extras/RunData.hs b/src-extras/Database/LSMTree/Extras/RunData.hs index f16eb80a4..348a74990 100644 --- a/src-extras/Database/LSMTree/Extras/RunData.hs +++ b/src-extras/Database/LSMTree/Extras/RunData.hs @@ -36,6 +36,7 @@ import qualified Data.Vector as V import Database.LSMTree.Extras (showPowersOf10) import Database.LSMTree.Extras.Generators () import Database.LSMTree.Internal.Entry +import Database.LSMTree.Internal.Index (IndexType) import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue) import Database.LSMTree.Internal.MergeSchedule (addWriteBufferEntries) import Database.LSMTree.Internal.Paths @@ -63,13 +64,14 @@ import Test.QuickCheck withRun :: HasFS IO h -> HasBlockIO IO h + -> IndexType -> RunFsPaths -> SerialisedRunData -> (Ref (Run IO h) -> IO a) -> IO a -withRun hfs hbio path rd = do +withRun hfs hbio indexType path rd = do bracket - (unsafeFlushAsWriteBuffer hfs hbio path $ serialiseRunData rd) + (unsafeFlushAsWriteBuffer hfs hbio indexType path $ serialiseRunData rd) releaseRef {-# INLINABLE withRuns #-} @@ -78,12 +80,13 @@ withRuns :: Traversable f => HasFS IO h -> HasBlockIO IO h + -> IndexType -> f (RunFsPaths, SerialisedRunData) -> (f (Ref (Run IO h)) -> IO a) -> IO a -withRuns hfs hbio xs = do +withRuns hfs hbio indexType xs = do bracket - (forM xs $ \(path, rd) -> unsafeFlushAsWriteBuffer hfs hbio path rd) + (forM xs $ \(path, rd) -> unsafeFlushAsWriteBuffer hfs hbio indexType path rd) (mapM_ releaseRef) -- | Flush serialised run data to disk as if it were a write buffer. @@ -95,14 +98,15 @@ withRuns hfs hbio xs = do unsafeFlushAsWriteBuffer :: HasFS IO h -> HasBlockIO IO h + -> IndexType -> RunFsPaths -> SerialisedRunData -> IO (Ref (Run IO h)) -unsafeFlushAsWriteBuffer fs hbio fsPaths (RunData m) = do +unsafeFlushAsWriteBuffer fs hbio indexType fsPaths (RunData m) = do let blobpath = addExtension (runBlobPath fsPaths) ".wb" wbblobs <- WBB.new fs blobpath wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob fs wbblobs)) m - run <- Run.fromWriteBuffer fs hbio CacheRunData (RunAllocFixed 10) + run <- Run.fromWriteBuffer fs hbio CacheRunData (RunAllocFixed 10) indexType fsPaths wb wbblobs releaseRef wbblobs return run diff --git a/src/Database/LSMTree/Internal/ChecksumHandle.hs b/src/Database/LSMTree/Internal/ChecksumHandle.hs index 104d5e1a3..39462c706 100644 --- a/src/Database/LSMTree/Internal/ChecksumHandle.hs +++ b/src/Database/LSMTree/Internal/ChecksumHandle.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE MagicHash #-} - module Database.LSMTree.Internal.ChecksumHandle ( -- * Checksum handles @@ -36,8 +34,8 @@ import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString) import Database.LSMTree.Internal.CRC32C (CRC32C) import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry -import Database.LSMTree.Internal.Index.Compact (IndexCompact) -import qualified Database.LSMTree.Internal.Index.Compact as Index +import Database.LSMTree.Internal.Index (Index, IndexType) +import qualified Database.LSMTree.Internal.Index as Index (finalLBS, headerLBS) import Database.LSMTree.Internal.Paths (ForBlob (..), ForFilter (..), ForIndex (..), ForKOps (..)) import qualified Database.LSMTree.Internal.RawBytes as RB @@ -46,7 +44,6 @@ import qualified Database.LSMTree.Internal.RawOverflowPage as RawOverflowPage import Database.LSMTree.Internal.RawPage (RawPage) import qualified Database.LSMTree.Internal.RawPage as RawPage import Database.LSMTree.Internal.Serialise -import GHC.Exts (Proxy#) import qualified System.FS.API as FS import System.FS.API import qualified System.FS.BlockIO.API as FS @@ -207,17 +204,17 @@ writeFilter hfs filterHandle bf = {-# SPECIALISE writeIndexHeader :: HasFS IO h -> ForIndex (ChecksumHandle RealWorld h) - -> Proxy# IndexCompact + -> IndexType -> IO () #-} writeIndexHeader :: (MonadSTM m, PrimMonad m) => HasFS m h -> ForIndex (ChecksumHandle (PrimState m) h) - -> Proxy# IndexCompact + -> IndexType -> m () -writeIndexHeader hfs indexHandle indexTypeProxy = +writeIndexHeader hfs indexHandle indexType = writeToHandle hfs (unForIndex indexHandle) $ - Index.headerLBS indexTypeProxy + Index.headerLBS indexType {-# SPECIALISE writeIndexChunk :: HasFS IO h @@ -238,14 +235,14 @@ writeIndexChunk hfs indexHandle chunk = HasFS IO h -> ForIndex (ChecksumHandle RealWorld h) -> NumEntries - -> IndexCompact + -> Index -> IO () #-} writeIndexFinal :: (MonadSTM m, PrimMonad m) => HasFS m h -> ForIndex (ChecksumHandle (PrimState m) h) -> NumEntries - -> IndexCompact + -> Index -> m () writeIndexFinal hfs indexHandle numEntries index = writeToHandle hfs (unForIndex indexHandle) $ diff --git a/src/Database/LSMTree/Internal/Chunk.hs b/src/Database/LSMTree/Internal/Chunk.hs index aba3275b3..a43ce09d6 100644 --- a/src/Database/LSMTree/Internal/Chunk.hs +++ b/src/Database/LSMTree/Internal/Chunk.hs @@ -9,7 +9,7 @@ module Database.LSMTree.Internal.Chunk toByteString, -- * Balers - Baler, + Baler (Baler), createBaler, feedBaler, unsafeEndBaler diff --git a/src/Database/LSMTree/Internal/Config.hs b/src/Database/LSMTree/Internal/Config.hs index ff0e7f771..c0a8f8306 100644 --- a/src/Database/LSMTree/Internal/Config.hs +++ b/src/Database/LSMTree/Internal/Config.hs @@ -21,6 +21,7 @@ module Database.LSMTree.Internal.Config ( , bloomFilterAllocForLevel -- * Fence pointer index , FencePointerIndex (..) + , indexTypeForRun -- * Disk cache policy , DiskCachePolicy (..) , diskCachePolicyForLevel @@ -36,6 +37,9 @@ import Data.Word (Word64) import Database.LSMTree.Internal.Assertions (assert, fromIntegralChecked) import Database.LSMTree.Internal.Entry (NumEntries (..)) +import Database.LSMTree.Internal.Index (IndexType) +import qualified Database.LSMTree.Internal.Index as Index + (IndexType (Compact, Ordinary)) import Database.LSMTree.Internal.Run (RunDataCaching (..)) import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..)) import qualified Monkey @@ -281,27 +285,26 @@ bloomFilterAllocForLevel conf (LevelNo l) = -------------------------------------------------------------------------------} -- | Configure the type of fence pointer index. --- --- TODO: this configuration option currently has no effect: 'CompactIndex' is --- always used. data FencePointerIndex = -- | Use a compact fence pointer index. -- - -- The compact index type is designed to work with keys that are large - -- cryptographic hashes, e.g. 32 bytes. + -- Compact indexes are designed to work with keys that are large (for + -- example, 32 bytes long) cryptographic hashes. -- - -- When using the 'IndexCompact', additional constraints apply to the - -- 'Database.LSMTree.Internal.Serialise.Class.serialiseKey' function. The - -- __Minimal size__ law should be satisfied: + -- When using a compact index, it is vital that the + -- 'Database.LSMTree.Internal.Serialise.Class.serialiseKey' function + -- satisfies the following law: -- -- [Minimal size] @'Database.LSMTree.Internal.RawBytes.size' - -- ('Database.LSMTree.Internal.Serialise.Class.serialiseKey' x) >= 8@ + -- ('Database.LSMTree.Internal.Serialise.Class.serialiseKey' x) >= 8@ -- -- Use 'Database.LSMTree.Internal.Serialise.Class.serialiseKeyMinimalSize' -- to test this law. CompactIndex - -- | Use an ordinary fence pointer index, without any constraints on - -- serialised keys. + -- | Use an ordinary fence pointer index + -- + -- Ordinary indexes do not have any constraints on keys other than that + -- their serialised forms may not be 64 KiB or more in size. | OrdinaryIndex deriving stock (Show, Eq) @@ -309,6 +312,10 @@ instance NFData FencePointerIndex where rnf CompactIndex = () rnf OrdinaryIndex = () +indexTypeForRun :: FencePointerIndex -> IndexType +indexTypeForRun CompactIndex = Index.Compact +indexTypeForRun OrdinaryIndex = Index.Ordinary + {------------------------------------------------------------------------------- Disk cache policy -------------------------------------------------------------------------------} diff --git a/src/Database/LSMTree/Internal/Index.hs b/src/Database/LSMTree/Internal/Index.hs index a22a98161..c83331a0a 100644 --- a/src/Database/LSMTree/Internal/Index.hs +++ b/src/Database/LSMTree/Internal/Index.hs @@ -1,90 +1,141 @@ -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE TypeFamilies #-} - {-| - Provides a common interface to different types of fence pointer indexes and - their accumulators. + Provides support for working with fence pointer indexes of different types + and their accumulators. + + Part of the functionality that this module provides is the construction of + serialised indexes in a mostly incremental fashion. The incremental part of + serialisation is provided through index accumulators, while the + non-incremental bits are provided through the index operations 'headerLBS' + and 'finalLBS'. To completely serialise an index interleaved with its + construction, proceed as follows: + + 1. Use 'headerLBS' to generate the header of the serialised index. + + 2. Incrementally construct the index using the operations of 'IndexAcc', + and assemble the body of the serialised index from the generated chunks. + + 3. Use 'finalLBS' to generate the footer of the serialised index. -} module Database.LSMTree.Internal.Index ( - Index (search, sizeInPages, headerLBS, finalLBS, fromSBS), - IndexAcc (ResultingIndex, appendSingle, appendMulti, unsafeEnd) + -- * Index types + IndexType (Compact, Ordinary), + + -- * Indexes + Index (CompactIndex, OrdinaryIndex), + search, + sizeInPages, + headerLBS, + finalLBS, + fromSBS, + + -- * Index accumulators + IndexAcc (CompactIndexAcc, OrdinaryIndexAcc), + newWithDefaults, + appendSingle, + appendMulti, + unsafeEnd ) where +import Control.Arrow (second) +import Control.DeepSeq (NFData (rnf)) import Control.Monad.ST.Strict (ST) import Data.ByteString.Lazy (LazyByteString) import Data.ByteString.Short (ShortByteString) import Data.Word (Word32) import Database.LSMTree.Internal.Chunk (Chunk) import Database.LSMTree.Internal.Entry (NumEntries) +import Database.LSMTree.Internal.Index.Compact (IndexCompact) +import qualified Database.LSMTree.Internal.Index.Compact as Compact (finalLBS, + fromSBS, headerLBS, search, sizeInPages) +import Database.LSMTree.Internal.Index.CompactAcc (IndexCompactAcc) +import qualified Database.LSMTree.Internal.Index.CompactAcc as Compact + (appendMulti, appendSingle, newWithDefaults, unsafeEnd) +import Database.LSMTree.Internal.Index.Ordinary (IndexOrdinary) +import qualified Database.LSMTree.Internal.Index.Ordinary as Ordinary (finalLBS, + fromSBS, headerLBS, search, sizeInPages) +import Database.LSMTree.Internal.Index.OrdinaryAcc (IndexOrdinaryAcc) +import qualified Database.LSMTree.Internal.Index.OrdinaryAcc as Ordinary + (appendMulti, appendSingle, newWithDefaults, unsafeEnd) import Database.LSMTree.Internal.Page (NumPages, PageSpan) import Database.LSMTree.Internal.Serialise (SerialisedKey) -import GHC.Exts (Proxy#) + +-- * Index types + +-- | The type of supported index types. +data IndexType = Compact | Ordinary + +-- * Indexes + +-- | The type of supported indexes. +data Index + = CompactIndex !IndexCompact + | OrdinaryIndex !IndexOrdinary + deriving stock (Eq, Show) + +instance NFData Index where + + rnf (CompactIndex index) = rnf index + rnf (OrdinaryIndex index) = rnf index {-| - The class of index types. + Searches for a page span that contains a key–value pair with the given key. + If there is indeed such a pair, the result is the corresponding page span; + if there is no such pair, the result is an arbitrary but valid page span. +-} +search :: SerialisedKey -> Index -> PageSpan +search key (CompactIndex index) = Compact.search key index +search key (OrdinaryIndex index) = Ordinary.search key index - This class contains also methods for the non-incremental parts of otherwise - incremental serialisation. To completely serialise an index interleaved with - its construction, proceed as follows: +-- | Yields the number of pages covered by an index. +sizeInPages :: Index -> NumPages +sizeInPages (CompactIndex index) = Compact.sizeInPages index +sizeInPages (OrdinaryIndex index) = Ordinary.sizeInPages index - 1. Use 'headerLBS' to generate the header of the serialised index. +{-| + Yields the header of the serialised form of an index. - 2. Incrementally construct the index using the methods of 'IndexAcc', and - assemble the body of the serialised index from the generated chunks. + See [the module documentation]("Database.LSMTree.Internal.Index") for how to + generate a complete serialised index. +-} +headerLBS :: IndexType -> LazyByteString +headerLBS Compact = Compact.headerLBS +headerLBS Ordinary = Ordinary.headerLBS - 3. Use 'finalLBS' to generate the footer of the serialised index. +{-| + Yields the footer of the serialised form of an index. + + See [the module documentation]("Database.LSMTree.Internal.Index") for how to + generate a complete serialised index. -} -class Index i where - - {-| - Searches for a page span that contains a key–value pair with the given - key. If there is indeed such a pair, the result is the corresponding - page span; if there is no such pair, the result is an arbitrary but - valid page span. - -} - search :: SerialisedKey -> i -> PageSpan - - -- | Yields the number of pages covered by an index. - sizeInPages :: i -> NumPages - - {-| - Yields the header of the serialised form of an index. - - See the documentation of the 'Index' class for how to generate a - complete serialised index. - -} - headerLBS :: Proxy# i -> LazyByteString - - {-| - Yields the footer of the serialised form of an index. - - See the documentation of the 'Index' class for how to generate a - complete serialised index. - -} - finalLBS :: NumEntries -> i -> LazyByteString - {-| - Reads an index along with the number of entries of the respective run - from a byte string. - - The byte string must contain the serialised index exactly, with no - leading or trailing space. Furthermore, its contents must be stored - 64-bit-aligned. - - The contents of the byte string may be directly used as the backing - memory for the constructed index. Currently, this is done for compact - indexes. - - For deserialising numbers, the endianness of the host system is used. If - serialisation has been done with a different endianness, this mismatch - is detected by looking at the type–version indicator. - -} - fromSBS :: ShortByteString -> Either String (NumEntries, i) +finalLBS :: NumEntries -> Index -> LazyByteString +finalLBS entryCount (CompactIndex index) = Compact.finalLBS entryCount index +finalLBS entryCount (OrdinaryIndex index) = Ordinary.finalLBS entryCount index {-| - The class of index accumulator types, where an index accumulator denotes an - index under incremental construction. + Reads an index along with the number of entries of the respective run from a + byte string. + + The byte string must contain the serialised index exactly, with no leading + or trailing space. Furthermore, its contents must be stored 64-bit-aligned. + + The contents of the byte string may be directly used as the backing memory + for the constructed index. Currently, this is done for compact indexes. + + For deserialising numbers, the endianness of the host system is used. If + serialisation has been done with a different endianness, this mismatch is + detected by looking at the type–version indicator. +-} +fromSBS :: IndexType -> ShortByteString -> Either String (NumEntries, Index) +fromSBS Compact input = second CompactIndex <$> Compact.fromSBS input +fromSBS Ordinary input = second OrdinaryIndex <$> Ordinary.fromSBS input + +-- * Index accumulators + +{-| + The type of supported index accumulators, where an index accumulator denotes + an index under incremental construction. Incremental index construction is only guaranteed to work correctly when the following conditions are met: @@ -95,37 +146,56 @@ class Index i where (Currently, construction of compact indexes needs the former and construction of ordinary indexes needs the latter bound.) -} -class Index (ResultingIndex j) => IndexAcc j where - - -- | The type of indexes constructed by accumulators of a certain type - type ResultingIndex j - - {-| - Adds information about a single page that fully comprises one or more - key–value pairs to an index and outputs newly available chunks. - - See the documentation of the 'IndexAcc' class for constraints to adhere - to. - -} - appendSingle :: (SerialisedKey, SerialisedKey) -> j s -> ST s (Maybe Chunk) - - {-| - Adds information about multiple pages that together comprise a single - key–value pair to an index and outputs newly available chunks. - - The provided 'Word32' value denotes the number of /overflow/ pages, so - that the number of pages that comprise the key–value pair is the - successor of that number. - - See the documentation of the 'IndexAcc' class for constraints to adhere - to. - -} - appendMulti :: (SerialisedKey, Word32) -> j s -> ST s [Chunk] - - {-| - Returns the constructed index, along with a final chunk in case the - serialised key list has not been fully output yet, thereby invalidating - the index under construction. Executing @unsafeEnd index@ is only safe - when @index@ is not used afterwards. - -} - unsafeEnd :: j s -> ST s (Maybe Chunk, ResultingIndex j) +data IndexAcc s = CompactIndexAcc (IndexCompactAcc s) + | OrdinaryIndexAcc (IndexOrdinaryAcc s) + +-- | Create a new index accumulator, using a default configuration. +newWithDefaults :: IndexType -> ST s (IndexAcc s) +newWithDefaults Compact = CompactIndexAcc <$> Compact.newWithDefaults +newWithDefaults Ordinary = OrdinaryIndexAcc <$> Ordinary.newWithDefaults + +{-| + Adds information about a single page that fully comprises one or more + key–value pairs to an index and outputs newly available chunks. + + See the documentation of the 'IndexAcc' type for constraints to adhere to. +-} +appendSingle :: (SerialisedKey, SerialisedKey) + -> IndexAcc s + -> ST s (Maybe Chunk) +appendSingle pageInfo (CompactIndexAcc indexAcc) = Compact.appendSingle + pageInfo + indexAcc +appendSingle pageInfo (OrdinaryIndexAcc indexAcc) = Ordinary.appendSingle + pageInfo + indexAcc + +{-| + Adds information about multiple pages that together comprise a single + key–value pair to an index and outputs newly available chunks. + + The provided 'Word32' value denotes the number of /overflow/ pages, so that + the number of pages that comprise the key–value pair is the successor of + that number. + + See the documentation of the 'IndexAcc' type for constraints to adhere to. +-} +appendMulti :: (SerialisedKey, Word32) -> IndexAcc s -> ST s [Chunk] +appendMulti pagesInfo (CompactIndexAcc indexAcc) = Compact.appendMulti + pagesInfo + indexAcc +appendMulti pagesInfo (OrdinaryIndexAcc indexAcc) = Ordinary.appendMulti + pagesInfo + indexAcc + +{-| + Returns the constructed index, along with a final chunk in case the + serialised key list has not been fully output yet, thereby invalidating the + index under construction. Executing @unsafeEnd index@ is only safe when + @index@ is not used afterwards. +-} +unsafeEnd :: IndexAcc s -> ST s (Maybe Chunk, Index) +unsafeEnd (CompactIndexAcc indexAcc) = second CompactIndex <$> + Compact.unsafeEnd indexAcc +unsafeEnd (OrdinaryIndexAcc indexAcc) = second OrdinaryIndex <$> + Ordinary.unsafeEnd indexAcc diff --git a/src/Database/LSMTree/Internal/Index/Compact.hs b/src/Database/LSMTree/Internal/Index/Compact.hs index fb0c75ef6..196cad6ae 100644 --- a/src/Database/LSMTree/Internal/Index/Compact.hs +++ b/src/Database/LSMTree/Internal/Index/Compact.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE MagicHash #-} - -- | A compact fence-pointer index for uniformly distributed keys. -- -- TODO: add utility functions for clash probability calculations @@ -8,18 +6,18 @@ module Database.LSMTree.Internal.Index.Compact ( -- $compact IndexCompact (..) -- * Queries - , Index.search - , Index.sizeInPages + , search + , sizeInPages , countClashes , hasClashes -- * Non-incremental serialisation , toLBS -- * Incremental serialisation - , Index.headerLBS - , Index.finalLBS + , headerLBS + , finalLBS , word64VectorToChunk -- * Deserialisation - , Index.fromSBS + , fromSBS ) where import Control.DeepSeq (NFData (..)) @@ -49,14 +47,10 @@ import Database.LSMTree.Internal.ByteString (byteArrayFromTo) import Database.LSMTree.Internal.Chunk (Chunk (Chunk)) import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString) import Database.LSMTree.Internal.Entry (NumEntries (..)) -import Database.LSMTree.Internal.Index (Index) -import qualified Database.LSMTree.Internal.Index as Index (finalLBS, fromSBS, - headerLBS, search, sizeInPages) import Database.LSMTree.Internal.Page import Database.LSMTree.Internal.Serialise import Database.LSMTree.Internal.Unsliced import Database.LSMTree.Internal.Vector -import GHC.Exts (Proxy#, proxy#) {- $compact @@ -385,7 +379,7 @@ instance NFData IndexCompact where {-| For a specification of this operation, see the documentation of [its - polymorphic version]('Index.search'). + type-agnostic version]('Database.LSMTree.Internal.Index.search'). See [an informal description of the search algorithm](#search-descr) for more details about the search algorithm. @@ -450,7 +444,7 @@ hasClashes = not . Map.null . icTieBreaker {-| For a specification of this operation, see the documentation of [its - polymorphic version]('Index.sizeInPages'). + type-agnostic version]('Database.LSMTree.Internal.Index.sizeInPages'). -} sizeInPages :: IndexCompact -> NumPages sizeInPages = NumPages . toEnum . VU.length . icPrimary @@ -462,7 +456,7 @@ sizeInPages = NumPages . toEnum . VU.length . icPrimary -- | Serialises a compact index in one go. toLBS :: NumEntries -> IndexCompact -> LBS.ByteString toLBS numEntries index = - headerLBS (proxy# @IndexCompact) + headerLBS <> LBS.fromStrict (Chunk.toByteString (word64VectorToChunk (icPrimary index))) <> finalLBS numEntries index @@ -478,17 +472,17 @@ supportedTypeAndVersion = 0x0001 {-| For a specification of this operation, see the documentation of [its - polymorphic version]('Index.headerLBS'). + type-agnostic version]('Database.LSMTree.Internal.Index.headerLBS'). -} -headerLBS :: Proxy# IndexCompact -> LBS.ByteString -headerLBS _ = +headerLBS :: LBS.ByteString +headerLBS = -- create a single 4 byte chunk BB.toLazyByteStringWith (BB.safeStrategy 4 BB.smallChunkSize) mempty $ BB.word32Host supportedTypeAndVersion <> BB.word32Host 0 {-| For a specification of this operation, see the documentation of [its - polymorphic version]('Index.finalLBS'). + type-agnostic version]('Database.LSMTree.Internal.Index.finalLBS'). -} finalLBS :: NumEntries -> IndexCompact -> LBS.ByteString finalLBS (NumEntries numEntries) IndexCompact {..} = @@ -554,7 +548,7 @@ putPaddingTo64 written {-| For a specification of this operation, see the documentation of [its - polymorphic version]('Index.fromSBS'). + type-agnostic version]('Database.LSMTree.Internal.Index.fromSBS'). -} fromSBS :: ShortByteString -> Either String (NumEntries, IndexCompact) fromSBS (SBS ba') = do @@ -676,27 +670,6 @@ checkedBitVec off len ba | otherwise = Nothing -{------------------------------------------------------------------------------- - Type class instantiation --------------------------------------------------------------------------------} - -instance Index IndexCompact where - - search :: SerialisedKey -> IndexCompact -> PageSpan - search = search - - sizeInPages :: IndexCompact -> NumPages - sizeInPages = sizeInPages - - headerLBS :: Proxy# IndexCompact -> LBS.ByteString - headerLBS = headerLBS - - finalLBS :: NumEntries -> IndexCompact -> LBS.ByteString - finalLBS = finalLBS - - fromSBS :: ShortByteString -> Either String (NumEntries, IndexCompact) - fromSBS = fromSBS - {------------------------------------------------------------------------------- Vector extras -------------------------------------------------------------------------------} diff --git a/src/Database/LSMTree/Internal/Index/CompactAcc.hs b/src/Database/LSMTree/Internal/Index/CompactAcc.hs index 795d628b8..31f964d0a 100644 --- a/src/Database/LSMTree/Internal/Index/CompactAcc.hs +++ b/src/Database/LSMTree/Internal/Index/CompactAcc.hs @@ -1,5 +1,4 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE CPP #-} {- | Incremental construction of a compact index yields chunks of the primary array that can be serialised incrementally. @@ -14,9 +13,10 @@ module Database.LSMTree.Internal.Index.CompactAcc ( -- * Construction IndexCompactAcc (..) , new - , Index.appendSingle - , Index.appendMulti - , Index.unsafeEnd + , newWithDefaults + , appendSingle + , appendMulti + , unsafeEnd -- * Internal: exported for testing and benchmarking , SMaybe (..) , unsafeWriteRange @@ -45,9 +45,6 @@ import qualified Data.Vector.Unboxed.Mutable as VUM import Data.Word import Database.LSMTree.Internal.BitMath import Database.LSMTree.Internal.Chunk (Chunk) -import Database.LSMTree.Internal.Index (IndexAcc, ResultingIndex) -import qualified Database.LSMTree.Internal.Index as Index (appendMulti, - appendSingle, unsafeEnd) import Database.LSMTree.Internal.Index.Compact import Database.LSMTree.Internal.Page import Database.LSMTree.Internal.Serialise @@ -118,7 +115,14 @@ newPinnedMVec64 lenWords = do {-| For a specification of this operation, see the documentation of [its - polymorphic version]('Index.appendSingle'). + type-agnostic version]('Database.LSMTree.Internal.Index.newWithDefaults'). +-} +newWithDefaults :: ST s (IndexCompactAcc s) +newWithDefaults = new 1024 + +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.appendSingle'). -} appendSingle :: forall s. (SerialisedKey, SerialisedKey) -> IndexCompactAcc s -> ST s (Maybe Chunk) appendSingle (minKey, maxKey) ica@IndexCompactAcc{..} = do @@ -169,7 +173,7 @@ appendSingle (minKey, maxKey) ica@IndexCompactAcc{..} = do {-| For a specification of this operation, see the documentation of [its - polymorphic version]('Index.appendMulti'). + type-agnostic version]('Database.LSMTree.Internal.Index.appendMulti'). -} appendMulti :: forall s. (SerialisedKey, Word32) -> IndexCompactAcc s -> ST s [Chunk] appendMulti (k, n0) ica@IndexCompactAcc{..} = @@ -217,7 +221,7 @@ yield IndexCompactAcc{..} = do {-| For a specification of this operation, see the documentation of [its - polymorphic version]('Index.unsafeEnd'). + type-agnostic version]('Database.LSMTree.Internal.Index.unsafeEnd'). -} unsafeEnd :: IndexCompactAcc s -> ST s (Maybe Chunk, IndexCompact) unsafeEnd IndexCompactAcc{..} = do @@ -249,26 +253,6 @@ unsafeEnd IndexCompactAcc{..} = do | ix == 0 = cs -- current chunk is completely empty, just ignore it | otherwise = VUM.slice 0 ix c : cs -{------------------------------------------------------------------------------- - Type class instantiation --------------------------------------------------------------------------------} - -instance IndexAcc IndexCompactAcc where - - type ResultingIndex IndexCompactAcc = IndexCompact - - appendSingle :: (SerialisedKey, SerialisedKey) - -> IndexCompactAcc s - -> ST s (Maybe Chunk) - appendSingle = appendSingle - - appendMulti :: (SerialisedKey, Word32) -> IndexCompactAcc s -> ST s [Chunk] - appendMulti = appendMulti - - unsafeEnd :: IndexCompactAcc s - -> ST s (Maybe Chunk, ResultingIndex IndexCompactAcc) - unsafeEnd = unsafeEnd - {------------------------------------------------------------------------------- Strict 'Maybe' -------------------------------------------------------------------------------} diff --git a/src/Database/LSMTree/Internal/Index/Ordinary.hs b/src/Database/LSMTree/Internal/Index/Ordinary.hs index ca8b02e24..8f0d00a72 100644 --- a/src/Database/LSMTree/Internal/Index/Ordinary.hs +++ b/src/Database/LSMTree/Internal/Index/Ordinary.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE MagicHash #-} - {- HLINT ignore "Avoid restricted alias" -} -- | A general-purpose fence pointer index. @@ -9,12 +7,15 @@ module Database.LSMTree.Internal.Index.Ordinary toLastKeys, search, sizeInPages, + headerLBS, + finalLBS, fromSBS ) where import Prelude hiding (drop, last, length) +import Control.DeepSeq (NFData (rnf)) import Control.Exception (assert) import Control.Monad (when) import Data.ByteString.Builder (toLazyByteString) @@ -31,14 +32,11 @@ import qualified Data.Vector.Primitive as Primitive (Vector (Vector), drop, import Data.Word (Word16, Word32, Word64, Word8, byteSwap32) import Database.LSMTree.Internal.Entry (NumEntries (NumEntries), unNumEntries) -import Database.LSMTree.Internal.Index - (Index (finalLBS, fromSBS, headerLBS, search, sizeInPages)) import Database.LSMTree.Internal.Page (NumPages (NumPages), PageNo (PageNo), PageSpan (PageSpan)) import Database.LSMTree.Internal.Serialise (SerialisedKey (SerialisedKey')) import Database.LSMTree.Internal.Vector (binarySearchL, mkPrimVector) -import GHC.Exts (Proxy#) {-| The type–version indicator for the ordinary index and its serialisation @@ -66,137 +64,157 @@ supportedTypeAndVersion = 0x0101 newtype IndexOrdinary = IndexOrdinary (Vector SerialisedKey) deriving stock (Eq, Show) +instance NFData IndexOrdinary where + + rnf (IndexOrdinary lastKeys) = rnf lastKeys + toLastKeys :: IndexOrdinary -> Vector SerialisedKey toLastKeys (IndexOrdinary lastKeys) = lastKeys -instance Index IndexOrdinary where - - search :: SerialisedKey -> IndexOrdinary -> PageSpan - search key (IndexOrdinary lastKeys) = assert (pageCount > 0) result where - - protoStart :: Int - !protoStart = binarySearchL lastKeys key - - pageCount :: Int - !pageCount = length lastKeys - - result :: PageSpan - !result | protoStart < pageCount - = let - - end :: Int - !end = maybe (pred pageCount) (+ protoStart) $ - findIndex (/= lastKeys ! protoStart) $ - drop (succ protoStart) lastKeys - - in PageSpan (PageNo $ protoStart) - (PageNo $ end) - | otherwise - = let - - start :: Int - !start = maybe 0 succ $ - findIndexR (/= last lastKeys) $ - lastKeys - - in PageSpan (PageNo $ start) - (PageNo $ pred pageCount) - - sizeInPages :: IndexOrdinary -> NumPages - sizeInPages (IndexOrdinary lastKeys) - = NumPages $ fromIntegral (length lastKeys) - - headerLBS :: Proxy# IndexOrdinary -> LazyByteString - headerLBS _ = toLazyByteString $ - word32Host $ - supportedTypeAndVersion - - finalLBS :: NumEntries -> IndexOrdinary -> LazyByteString - finalLBS entryCount _ = toLazyByteString $ - word64Host $ - fromIntegral $ - unNumEntries $ - entryCount - - fromSBS :: ShortByteString -> Either String (NumEntries, IndexOrdinary) - fromSBS shortByteString@(SBS unliftedByteArray) - | fullSize < 12 - = Left "Doesn't contain header and footer" - | typeAndVersion == byteSwap32 supportedTypeAndVersion - = Left "Non-matching endianness" - | typeAndVersion /= supportedTypeAndVersion - = Left "Unsupported type or version" - | otherwise - = (,) <$> entryCount <*> index - where +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.search'). +-} +search :: SerialisedKey -> IndexOrdinary -> PageSpan +search key (IndexOrdinary lastKeys) = assert (pageCount > 0) result where - fullSize :: Int - fullSize = ShortByteString.length shortByteString + protoStart :: Int + !protoStart = binarySearchL lastKeys key - byteArray :: ByteArray - byteArray = ByteArray unliftedByteArray + pageCount :: Int + !pageCount = length lastKeys - fullBytes :: Primitive.Vector Word8 - fullBytes = mkPrimVector 0 fullSize byteArray + result :: PageSpan + !result | protoStart < pageCount + = let - typeAndVersion :: Word32 - typeAndVersion = indexByteArray byteArray 0 + end :: Int + !end = maybe (pred pageCount) (+ protoStart) $ + findIndex (/= lastKeys ! protoStart) $ + drop (succ protoStart) lastKeys - postTypeAndVersionBytes :: Primitive.Vector Word8 - postTypeAndVersionBytes = Primitive.drop 4 fullBytes + in PageSpan (PageNo $ protoStart) + (PageNo $ end) + | otherwise + = let - lastKeysBytes, entryCountBytes :: Primitive.Vector Word8 - (lastKeysBytes, entryCountBytes) - = Primitive.splitAt (fullSize - 12) postTypeAndVersionBytes + start :: Int + !start = maybe 0 succ $ + findIndexR (/= last lastKeys) $ + lastKeys - entryCount :: Either String NumEntries - entryCount - | toInteger asWord64 > toInteger (maxBound :: Int) - = Left "Number of entries not representable as Int" - | otherwise - = Right (NumEntries (fromIntegral asWord64)) - where + in PageSpan (PageNo $ start) + (PageNo $ pred pageCount) + +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.sizeInPages'). +-} +sizeInPages :: IndexOrdinary -> NumPages +sizeInPages (IndexOrdinary lastKeys) = NumPages $ fromIntegral (length lastKeys) - asWord64 :: Word64 - asWord64 = indexByteArray entryCountRep 0 +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.headerLBS'). +-} +headerLBS :: LazyByteString +headerLBS = toLazyByteString $ + word32Host $ + supportedTypeAndVersion + +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.finalLBS'). +-} +finalLBS :: NumEntries -> IndexOrdinary -> LazyByteString +finalLBS entryCount _ = toLazyByteString $ + word64Host $ + fromIntegral $ + unNumEntries $ + entryCount + +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.fromSBS'). +-} +fromSBS :: ShortByteString -> Either String (NumEntries, IndexOrdinary) +fromSBS shortByteString@(SBS unliftedByteArray) + | fullSize < 12 + = Left "Doesn't contain header and footer" + | typeAndVersion == byteSwap32 supportedTypeAndVersion + = Left "Non-matching endianness" + | typeAndVersion /= supportedTypeAndVersion + = Left "Unsupported type or version" + | otherwise + = (,) <$> entryCount <*> index + where + + fullSize :: Int + fullSize = ShortByteString.length shortByteString + + byteArray :: ByteArray + byteArray = ByteArray unliftedByteArray + + fullBytes :: Primitive.Vector Word8 + fullBytes = mkPrimVector 0 fullSize byteArray + + typeAndVersion :: Word32 + typeAndVersion = indexByteArray byteArray 0 + + postTypeAndVersionBytes :: Primitive.Vector Word8 + postTypeAndVersionBytes = Primitive.drop 4 fullBytes + + lastKeysBytes, entryCountBytes :: Primitive.Vector Word8 + (lastKeysBytes, entryCountBytes) + = Primitive.splitAt (fullSize - 12) postTypeAndVersionBytes + + entryCount :: Either String NumEntries + entryCount | toInteger asWord64 > toInteger (maxBound :: Int) + = Left "Number of entries not representable as Int" + | otherwise + = Right (NumEntries (fromIntegral asWord64)) + where - entryCountRep :: ByteArray - Primitive.Vector _ _ entryCountRep = Primitive.force entryCountBytes + asWord64 :: Word64 + asWord64 = indexByteArray entryCountRep 0 - index :: Either String IndexOrdinary - index = IndexOrdinary <$> fromList <$> lastKeys lastKeysBytes + entryCountRep :: ByteArray + Primitive.Vector _ _ entryCountRep = Primitive.force entryCountBytes - lastKeys :: Primitive.Vector Word8 -> Either String [SerialisedKey] - lastKeys bytes - | Primitive.null bytes - = Right [] - | otherwise - = do - when (Primitive.length bytes < 2) - (Left "Too few bytes for key size") - let + index :: Either String IndexOrdinary + index = IndexOrdinary <$> fromList <$> lastKeys lastKeysBytes + + lastKeys :: Primitive.Vector Word8 -> Either String [SerialisedKey] + lastKeys bytes + | Primitive.null bytes + = Right [] + | otherwise + = do + when (Primitive.length bytes < 2) + (Left "Too few bytes for key size") + let - firstSizeRep :: ByteArray - Primitive.Vector _ _ firstSizeRep - = Primitive.force (Primitive.take 2 bytes) + firstSizeRep :: ByteArray + Primitive.Vector _ _ firstSizeRep + = Primitive.force (Primitive.take 2 bytes) - firstSize :: Int - firstSize = fromIntegral $ - (indexByteArray firstSizeRep 0 :: Word16) + firstSize :: Int + firstSize = fromIntegral $ + (indexByteArray firstSizeRep 0 :: Word16) - postFirstSizeBytes :: Primitive.Vector Word8 - postFirstSizeBytes = Primitive.drop 2 bytes + postFirstSizeBytes :: Primitive.Vector Word8 + postFirstSizeBytes = Primitive.drop 2 bytes - when (Primitive.length postFirstSizeBytes < firstSize) - (Left "Too few bytes for key") - let + when (Primitive.length postFirstSizeBytes < firstSize) + (Left "Too few bytes for key") + let - firstBytes, othersBytes :: Primitive.Vector Word8 - (firstBytes, othersBytes) - = Primitive.splitAt firstSize postFirstSizeBytes + firstBytes, othersBytes :: Primitive.Vector Word8 + (firstBytes, othersBytes) + = Primitive.splitAt firstSize postFirstSizeBytes - first :: SerialisedKey - first = SerialisedKey' (Primitive.force firstBytes) + first :: SerialisedKey + first = SerialisedKey' (Primitive.force firstBytes) - others <- lastKeys othersBytes - return (first : others) + others <- lastKeys othersBytes + return (first : others) diff --git a/src/Database/LSMTree/Internal/Index/OrdinaryAcc.hs b/src/Database/LSMTree/Internal/Index/OrdinaryAcc.hs index b8b454237..4dce90ba7 100644 --- a/src/Database/LSMTree/Internal/Index/OrdinaryAcc.hs +++ b/src/Database/LSMTree/Internal/Index/OrdinaryAcc.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE TypeFamilies #-} - {- HLINT ignore "Avoid restricted alias" -} {-| @@ -8,8 +6,9 @@ -} module Database.LSMTree.Internal.Index.OrdinaryAcc ( - IndexOrdinaryAcc, + IndexOrdinaryAcc (IndexOrdinaryAcc), new, + newWithDefaults, appendSingle, appendMulti, unsafeEnd @@ -25,8 +24,6 @@ import qualified Data.Vector.Primitive as Primitive (Vector, length) import Data.Word (Word16, Word32, Word8) import Database.LSMTree.Internal.Chunk (Baler, Chunk, createBaler, feedBaler, unsafeEndBaler) -import Database.LSMTree.Internal.Index - (IndexAcc (ResultingIndex, appendMulti, appendSingle, unsafeEnd)) import Database.LSMTree.Internal.Index.Ordinary (IndexOrdinary (IndexOrdinary)) import Database.LSMTree.Internal.Serialise @@ -55,6 +52,13 @@ new initialKeyBufferSize minChunkSize = IndexOrdinaryAcc <$> Growing.new initialKeyBufferSize <*> createBaler minChunkSize +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.newWithDefaults'). +-} +newWithDefaults :: ST s (IndexOrdinaryAcc s) +newWithDefaults = new 1024 4096 + -- Yields the serialisation of an element of a key list. keyListElem :: SerialisedKey -> [Primitive.Vector Word8] keyListElem (SerialisedKey' keyBytes) = [keySizeBytes, keyBytes] where @@ -69,35 +73,43 @@ keyListElem (SerialisedKey' keyBytes) = [keySizeBytes, keyBytes] where keySizeBytes :: Primitive.Vector Word8 !keySizeBytes = byteVectorFromPrim keySizeAsWord16 -instance IndexAcc IndexOrdinaryAcc where - - type ResultingIndex IndexOrdinaryAcc = IndexOrdinary - - appendSingle :: (SerialisedKey, SerialisedKey) - -> IndexOrdinaryAcc s - -> ST s (Maybe Chunk) - appendSingle (_, key) (IndexOrdinaryAcc lastKeys baler) - = do - Growing.append lastKeys 1 key - feedBaler (keyListElem key) baler - - appendMulti :: (SerialisedKey, Word32) - -> IndexOrdinaryAcc s - -> ST s [Chunk] - appendMulti (key, overflowPageCount) (IndexOrdinaryAcc lastKeys baler) - = do - Growing.append lastKeys pageCount key - maybeToList <$> feedBaler keyListElems baler - where - - pageCount :: Int - !pageCount = succ (fromIntegral overflowPageCount) - - keyListElems :: [Primitive.Vector Word8] - keyListElems = concat (replicate pageCount (keyListElem key)) - - unsafeEnd :: IndexOrdinaryAcc s -> ST s (Maybe Chunk, IndexOrdinary) - unsafeEnd (IndexOrdinaryAcc lastKeys baler) = do - keys <- Growing.freeze lastKeys - remnant <- unsafeEndBaler baler - return (remnant, IndexOrdinary keys) +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.appendSingle'). +-} +appendSingle :: (SerialisedKey, SerialisedKey) + -> IndexOrdinaryAcc s + -> ST s (Maybe Chunk) +appendSingle (_, key) (IndexOrdinaryAcc lastKeys baler) + = do + Growing.append lastKeys 1 key + feedBaler (keyListElem key) baler + +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.appendMulti'). +-} +appendMulti :: (SerialisedKey, Word32) + -> IndexOrdinaryAcc s + -> ST s [Chunk] +appendMulti (key, overflowPageCount) (IndexOrdinaryAcc lastKeys baler) + = do + Growing.append lastKeys pageCount key + maybeToList <$> feedBaler keyListElems baler + where + + pageCount :: Int + !pageCount = succ (fromIntegral overflowPageCount) + + keyListElems :: [Primitive.Vector Word8] + keyListElems = concat (replicate pageCount (keyListElem key)) + +{-| + For a specification of this operation, see the documentation of [its + type-agnostic version]('Database.LSMTree.Internal.Index.unsafeEnd'). +-} +unsafeEnd :: IndexOrdinaryAcc s -> ST s (Maybe Chunk, IndexOrdinary) +unsafeEnd (IndexOrdinaryAcc lastKeys baler) = do + keys <- Growing.freeze lastKeys + remnant <- unsafeEndBaler baler + return (remnant, IndexOrdinary keys) diff --git a/src/Database/LSMTree/Internal/Lookup.hs b/src/Database/LSMTree/Internal/Lookup.hs index 5582b17bb..bbbb860b8 100644 --- a/src/Database/LSMTree/Internal/Lookup.hs +++ b/src/Database/LSMTree/Internal/Lookup.hs @@ -41,8 +41,8 @@ import Control.RefCount import Database.LSMTree.Internal.BlobRef (WeakBlobRef (..)) import Database.LSMTree.Internal.Entry +import Database.LSMTree.Internal.Index (Index) import qualified Database.LSMTree.Internal.Index as Index (search) -import Database.LSMTree.Internal.Index.Compact (IndexCompact) import Database.LSMTree.Internal.Page (PageSpan (..), getNumPages, pageSpanSize, unPageNo) import Database.LSMTree.Internal.RawBytes (RawBytes (..)) @@ -72,7 +72,7 @@ import Database.LSMTree.Internal.BloomFilterQuery1 (RunIxKeyIx (..), prepLookups :: Arena s -> V.Vector (Bloom SerialisedKey) - -> V.Vector IndexCompact + -> V.Vector Index -> V.Vector (Handle h) -> V.Vector SerialisedKey -> ST s (VP.Vector RunIxKeyIx, V.Vector (IOOp s h)) @@ -90,7 +90,7 @@ type RunIx = Int -- positive search result. indexSearches :: Arena s - -> V.Vector IndexCompact + -> V.Vector Index -> V.Vector (Handle h) -> V.Vector SerialisedKey -> VP.Vector RunIxKeyIx -- ^ Result of 'bloomQueries' @@ -162,7 +162,7 @@ data ByteCountDiscrepancy = ByteCountDiscrepancy { -> Ref (WBB.WriteBufferBlobs IO h) -> V.Vector (Ref (Run IO h)) -> V.Vector (Bloom SerialisedKey) - -> V.Vector IndexCompact + -> V.Vector Index -> V.Vector (Handle h) -> V.Vector SerialisedKey -> IO (V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef IO h)))) @@ -182,7 +182,7 @@ lookupsIO :: -> Ref (WBB.WriteBufferBlobs m h) -> V.Vector (Ref (Run m h)) -- ^ Runs @rs@ -> V.Vector (Bloom SerialisedKey) -- ^ The bloom filters inside @rs@ - -> V.Vector IndexCompact -- ^ The indexes inside @rs@ + -> V.Vector Index -- ^ The indexes inside @rs@ -> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@ -> V.Vector SerialisedKey -> m (V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h)))) diff --git a/src/Database/LSMTree/Internal/Merge.hs b/src/Database/LSMTree/Internal/Merge.hs index 1716cd323..568e4c5a3 100644 --- a/src/Database/LSMTree/Internal/Merge.hs +++ b/src/Database/LSMTree/Internal/Merge.hs @@ -28,6 +28,7 @@ import Data.Traversable (for) import qualified Data.Vector as V import Database.LSMTree.Internal.BlobRef (RawBlobRef) import Database.LSMTree.Internal.Entry +import Database.LSMTree.Internal.Index (IndexType) import Database.LSMTree.Internal.Run (Run, RunDataCaching) import qualified Database.LSMTree.Internal.Run as Run import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..)) @@ -99,6 +100,7 @@ type Mappend = SerialisedValue -> SerialisedValue -> SerialisedValue -> HasBlockIO IO h -> RunDataCaching -> RunBloomFilterAlloc + -> IndexType -> MergeType -> Mappend -> Run.RunFsPaths @@ -112,18 +114,19 @@ new :: -> HasBlockIO m h -> RunDataCaching -> RunBloomFilterAlloc + -> IndexType -> MergeType -> Mappend -> Run.RunFsPaths -> V.Vector (Ref (Run m h)) -> m (Maybe (Merge m h)) -new fs hbio mergeCaching alloc mergeType mergeMappend targetPaths runs = do +new fs hbio mergeCaching alloc indexType mergeType mergeMappend targetPaths runs = do -- no offset, no write buffer mreaders <- Readers.new Readers.NoOffsetKey Nothing runs for mreaders $ \mergeReaders -> do -- calculate upper bounds based on input runs let numEntries = V.foldMap' Run.size runs - mergeBuilder <- Builder.new fs hbio targetPaths numEntries alloc + mergeBuilder <- Builder.new fs hbio targetPaths numEntries alloc indexType mergeState <- newMutVar $! Merging return Merge { mergeHasFS = fs diff --git a/src/Database/LSMTree/Internal/MergeSchedule.hs b/src/Database/LSMTree/Internal/MergeSchedule.hs index fc32cf817..9a8a4a642 100644 --- a/src/Database/LSMTree/Internal/MergeSchedule.hs +++ b/src/Database/LSMTree/Internal/MergeSchedule.hs @@ -47,7 +47,7 @@ import Database.LSMTree.Internal.Assertions (assert) import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.Entry (Entry, NumEntries (..), unNumEntries) -import Database.LSMTree.Internal.Index.Compact (IndexCompact) +import Database.LSMTree.Internal.Index (Index) import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue) import Database.LSMTree.Internal.Merge (MergeType (..)) import Database.LSMTree.Internal.MergingRun (MergingRun, NumRuns (..)) @@ -174,7 +174,7 @@ releaseTableContent reg (TableContent _wb wbb levels cache ul) = do data LevelsCache m h = LevelsCache_ { cachedRuns :: !(V.Vector (Ref (Run m h))) , cachedFilters :: !(V.Vector (Bloom SerialisedKey)) - , cachedIndexes :: !(V.Vector IndexCompact) + , cachedIndexes :: !(V.Vector Index) , cachedKOpsFiles :: !(V.Vector (FS.Handle h)) } @@ -569,21 +569,23 @@ flushWriteBuffer :: -> ActionRegistry m -> TableContent m h -> m (TableContent m h) -flushWriteBuffer tr conf@TableConfig{confDiskCachePolicy} +flushWriteBuffer tr conf@TableConfig{confFencePointerIndex, confDiskCachePolicy} resolve hfs hbio root uc reg tc | WB.null (tableWriteBuffer tc) = pure tc | otherwise = do !n <- incrUniqCounter uc - let !size = WB.numEntries (tableWriteBuffer tc) - !l = LevelNo 1 - !cache = diskCachePolicyForLevel confDiskCachePolicy l - !alloc = bloomFilterAllocForLevel conf l - !path = Paths.runPath root (uniqueToRunNumber n) + let !size = WB.numEntries (tableWriteBuffer tc) + !l = LevelNo 1 + !cache = diskCachePolicyForLevel confDiskCachePolicy l + !alloc = bloomFilterAllocForLevel conf l + !indexType = indexTypeForRun confFencePointerIndex + !path = Paths.runPath root (uniqueToRunNumber n) traceWith tr $ AtLevel l $ TraceFlushWriteBuffer size (runNumber path) cache alloc r <- withRollback reg (Run.fromWriteBuffer hfs hbio cache alloc + indexType path (tableWriteBuffer tc) (tableWriteBufferBlobs tc)) @@ -732,13 +734,14 @@ addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root uc r0 reg levels ul !n <- incrUniqCounter uc let !caching = diskCachePolicyForLevel confDiskCachePolicy ln !alloc = bloomFilterAllocForLevel conf ln + !indexType = indexTypeForRun confFencePointerIndex !runPaths = Paths.runPath root (uniqueToRunNumber n) traceWith tr $ AtLevel ln $ TraceNewMerge (V.map Run.size rs) (runNumber runPaths) caching alloc mergePolicy mergeType -- The runs will end up inside the merging run, with fresh references. -- The original references can be released (but only on the happy path). mr <- withRollback reg - (MR.new hfs hbio resolve caching alloc mergeType runPaths rs) + (MR.new hfs hbio resolve caching alloc indexType mergeType runPaths rs) releaseRef V.forM_ rs $ \r -> delayedCommit reg (releaseRef r) case confMergeSchedule of diff --git a/src/Database/LSMTree/Internal/MergingRun.hs b/src/Database/LSMTree/Internal/MergingRun.hs index 32c0889ec..13785c55f 100644 --- a/src/Database/LSMTree/Internal/MergingRun.hs +++ b/src/Database/LSMTree/Internal/MergingRun.hs @@ -1,7 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE PatternSynonyms #-} -{-# LANGUAGE TypeFamilies #-} {- HLINT ignore "Use when" -} @@ -55,6 +54,7 @@ import Data.Primitive.PrimVar import qualified Data.Vector as V import Database.LSMTree.Internal.Assertions (assert) import Database.LSMTree.Internal.Entry (NumEntries (..)) +import Database.LSMTree.Internal.Index (IndexType) import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue) import Database.LSMTree.Internal.Merge (Merge, MergeType (..), StepResult (..)) @@ -123,6 +123,7 @@ instance NFData MergeKnownCompleted where -> ResolveSerialisedValue -> Run.RunDataCaching -> RunBloomFilterAlloc + -> IndexType -> MergeType -> RunFsPaths -> V.Vector (Ref (Run IO h)) @@ -141,16 +142,17 @@ new :: -> ResolveSerialisedValue -> Run.RunDataCaching -> RunBloomFilterAlloc + -> IndexType -> MergeType -> RunFsPaths -> V.Vector (Ref (Run m h)) -> m (Ref (MergingRun m h)) -new hfs hbio resolve caching alloc mergeType runPaths inputRuns = +new hfs hbio resolve caching alloc indexType mergeType runPaths inputRuns = -- If creating the Merge fails, we must release the references again. withActionRegistry $ \reg -> do runs <- V.mapM (\r -> withRollback reg (dupRef r) releaseRef) inputRuns merge <- fromMaybe (error "newMerge: merges can not be empty") - <$> Merge.new hfs hbio caching alloc mergeType resolve runPaths runs + <$> Merge.new hfs hbio caching alloc indexType mergeType resolve runPaths runs let numInputRuns = NumRuns $ V.length runs let numInputEntries = V.foldMap' Run.size runs unsafeNew numInputRuns numInputEntries MergeMaybeCompleted $ diff --git a/src/Database/LSMTree/Internal/Run.hs b/src/Database/LSMTree/Internal/Run.hs index 3e00375fa..decd8a0fa 100644 --- a/src/Database/LSMTree/Internal/Run.hs +++ b/src/Database/LSMTree/Internal/Run.hs @@ -3,7 +3,6 @@ {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingVia #-} {-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TypeFamilies #-} -- | Runs of sorted key\/value data. module Database.LSMTree.Internal.Run ( @@ -41,8 +40,8 @@ import qualified Database.LSMTree.Internal.BlobRef as BlobRef import Database.LSMTree.Internal.BloomFilter (bloomFilterFromSBS) import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry (NumEntries (..)) +import Database.LSMTree.Internal.Index (Index, IndexType) import qualified Database.LSMTree.Internal.Index as Index (fromSBS, sizeInPages) -import Database.LSMTree.Internal.Index.Compact (IndexCompact) import Database.LSMTree.Internal.Page (NumPages) import Database.LSMTree.Internal.Paths as Paths import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc) @@ -74,7 +73,7 @@ data Run m h = Run { -- | The in-memory index mapping keys to page numbers in the -- Key\/Ops file. In future we may support alternative index -- representations. - , runIndex :: !IndexCompact + , runIndex :: !Index -- | The file handle for the Key\/Ops file. This file is opened -- read-only and is accessed in a page-oriented way, i.e. only -- reading whole pages, at page offsets. It will be opened with @@ -201,6 +200,7 @@ fromMutable runRunDataCaching builder = do -> HasBlockIO IO h -> RunDataCaching -> RunBloomFilterAlloc + -> IndexType -> RunFsPaths -> WriteBuffer -> Ref (WriteBufferBlobs IO h) @@ -218,12 +218,14 @@ fromWriteBuffer :: -> HasBlockIO m h -> RunDataCaching -> RunBloomFilterAlloc + -> IndexType -> RunFsPaths -> WriteBuffer -> Ref (WriteBufferBlobs m h) -> m (Ref (Run m h)) -fromWriteBuffer fs hbio caching alloc fsPaths buffer blobs = do - builder <- Builder.new fs hbio fsPaths (WB.numEntries buffer) alloc +fromWriteBuffer fs hbio caching alloc indexType fsPaths buffer blobs = do + builder <- Builder.new fs hbio fsPaths (WB.numEntries buffer) + alloc indexType for_ (WB.toList buffer) $ \(k, e) -> Builder.addKeyOp builder k (fmap (WBB.mkRawBlobRef blobs) e) --TODO: the fmap entry here reallocates even when there are no blobs @@ -237,6 +239,7 @@ fromWriteBuffer fs hbio caching alloc fsPaths buffer blobs = do HasFS IO h -> HasBlockIO IO h -> RunDataCaching + -> IndexType -> RunFsPaths -> IO (Ref (Run IO h)) #-} -- | Load a previously written run from disk, checking each file's checksum @@ -252,10 +255,11 @@ openFromDisk :: => HasFS m h -> HasBlockIO m h -> RunDataCaching + -> IndexType -> RunFsPaths -> m (Ref (Run m h)) -- TODO: make exception safe -openFromDisk fs hbio runRunDataCaching runRunFsPaths = do +openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do expectedChecksums <- CRC.expectValidFile (runChecksumsPath runRunFsPaths) . fromChecksumsFile =<< CRC.readChecksumsFile fs (runChecksumsPath runRunFsPaths) @@ -270,7 +274,7 @@ openFromDisk fs hbio runRunDataCaching runRunFsPaths = do CRC.expectValidFile (forRunFilterRaw paths) . bloomFilterFromSBS =<< readCRC (forRunFilterRaw expectedChecksums) (forRunFilterRaw paths) (runNumEntries, runIndex) <- - CRC.expectValidFile (forRunIndexRaw paths) . Index.fromSBS + CRC.expectValidFile (forRunIndexRaw paths) . Index.fromSBS indexType =<< readCRC (forRunIndexRaw expectedChecksums) (forRunIndexRaw paths) runKOpsFile <- FS.hOpen fs (runKOpsPath runRunFsPaths) FS.ReadMode diff --git a/src/Database/LSMTree/Internal/RunAcc.hs b/src/Database/LSMTree/Internal/RunAcc.hs index 0ce5f3f7b..0ffeaff57 100644 --- a/src/Database/LSMTree/Internal/RunAcc.hs +++ b/src/Database/LSMTree/Internal/RunAcc.hs @@ -44,11 +44,9 @@ import Database.LSMTree.Internal.Assertions (fromIntegralChecked) import Database.LSMTree.Internal.BlobRef (BlobSpan (..)) import Database.LSMTree.Internal.Chunk (Chunk) import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..)) +import Database.LSMTree.Internal.Index (Index, IndexAcc, IndexType) import qualified Database.LSMTree.Internal.Index as Index (appendMulti, - appendSingle, unsafeEnd) -import Database.LSMTree.Internal.Index.Compact (IndexCompact) -import Database.LSMTree.Internal.Index.CompactAcc (IndexCompactAcc) -import qualified Database.LSMTree.Internal.Index.CompactAcc as IndexCompact + appendSingle, newWithDefaults, unsafeEnd) import Database.LSMTree.Internal.PageAcc (PageAcc) import qualified Database.LSMTree.Internal.PageAcc as PageAcc import qualified Database.LSMTree.Internal.PageAcc1 as PageAcc @@ -72,7 +70,7 @@ import qualified Monkey -- 'unsafeFinalise'. data RunAcc s = RunAcc { mbloom :: !(MBloom s SerialisedKey) - , mindex :: !(IndexCompactAcc s) + , mindex :: !(IndexAcc s) , mpageacc :: !(PageAcc s) , entryCount :: !(PrimVar s Int) } @@ -90,8 +88,12 @@ data RunBloomFilterAlloc = -- -- @nentries@ should be an upper bound on the expected number of entries in the -- output run. -new :: NumEntries -> RunBloomFilterAlloc -> ST s (RunAcc s) -new (NumEntries nentries) alloc = do +new :: + NumEntries + -> RunBloomFilterAlloc + -> IndexType + -> ST s (RunAcc s) +new (NumEntries nentries) alloc indexType = do mbloom <- case alloc of RunAllocFixed !bitsPerEntry -> let !nbits = fromIntegral bitsPerEntry * fromIntegral nentries @@ -104,7 +106,7 @@ new (NumEntries nentries) alloc = do MBloom.new (fromIntegralChecked $ Monkey.numHashFunctions (fromIntegral nbits) (fromIntegral nentries)) nbits - mindex <- IndexCompact.new 1024 -- TODO(optimise): tune chunk size + mindex <- Index.newWithDefaults indexType mpageacc <- PageAcc.newPageAcc entryCount <- newPrimVar 0 pure RunAcc{..} @@ -120,7 +122,7 @@ unsafeFinalise :: -> ST s ( Maybe RawPage , Maybe Chunk , Bloom SerialisedKey - , IndexCompact + , Index , NumEntries ) unsafeFinalise racc@RunAcc {..} = do diff --git a/src/Database/LSMTree/Internal/RunBuilder.hs b/src/Database/LSMTree/Internal/RunBuilder.hs index eb45fbbf2..6c8e05ce3 100644 --- a/src/Database/LSMTree/Internal/RunBuilder.hs +++ b/src/Database/LSMTree/Internal/RunBuilder.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE MagicHash #-} - -- | A mutable run ('RunBuilder') that is under construction. -- module Database.LSMTree.Internal.RunBuilder ( @@ -25,14 +23,13 @@ import Database.LSMTree.Internal.BlobRef (RawBlobRef) import Database.LSMTree.Internal.ChecksumHandle import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry -import Database.LSMTree.Internal.Index.Compact (IndexCompact) +import Database.LSMTree.Internal.Index (Index, IndexType) import Database.LSMTree.Internal.Paths import Database.LSMTree.Internal.RawOverflowPage (RawOverflowPage) import Database.LSMTree.Internal.RawPage (RawPage) import Database.LSMTree.Internal.RunAcc (RunAcc, RunBloomFilterAlloc) import qualified Database.LSMTree.Internal.RunAcc as RunAcc import Database.LSMTree.Internal.Serialise -import GHC.Exts (proxy#) import qualified System.FS.API as FS import System.FS.API (HasFS) import qualified System.FS.BlockIO.API as FS @@ -74,6 +71,7 @@ data RunBuilder m h = RunBuilder { -> RunFsPaths -> NumEntries -> RunBloomFilterAlloc + -> IndexType -> IO (RunBuilder IO h) #-} -- | Create an 'RunBuilder' to start building a run. -- @@ -85,15 +83,16 @@ new :: -> RunFsPaths -> NumEntries -- ^ an upper bound of the number of entries to be added -> RunBloomFilterAlloc + -> IndexType -> m (RunBuilder m h) -new hfs hbio runBuilderFsPaths numEntries alloc = do - runBuilderAcc <- ST.stToIO $ RunAcc.new numEntries alloc +new hfs hbio runBuilderFsPaths numEntries alloc indexType = do + runBuilderAcc <- ST.stToIO $ RunAcc.new numEntries alloc indexType runBuilderBlobOffset <- newPrimVar 0 runBuilderHandles <- traverse (makeHandle hfs) (pathsForRunFiles runBuilderFsPaths) let builder = RunBuilder { runBuilderHasFS = hfs, runBuilderHasBlockIO = hbio, .. } - writeIndexHeader hfs (forRunIndex runBuilderHandles) (proxy# @IndexCompact) + writeIndexHeader hfs (forRunIndex runBuilderHandles) indexType return builder {-# SPECIALISE addKeyOp :: @@ -168,7 +167,7 @@ addLargeSerialisedKeyOp RunBuilder{..} key page overflowPages = do {-# SPECIALISE unsafeFinalise :: Bool -> RunBuilder IO h - -> IO (HasFS IO h, HasBlockIO IO h, RunFsPaths, Bloom SerialisedKey, IndexCompact, NumEntries) #-} + -> IO (HasFS IO h, HasBlockIO IO h, RunFsPaths, Bloom SerialisedKey, Index, NumEntries) #-} -- | Finish construction of the run. -- Writes the filter and index to file and leaves all written files on disk. -- @@ -179,7 +178,7 @@ unsafeFinalise :: (MonadST m, MonadSTM m, MonadThrow m) => Bool -- ^ drop caches -> RunBuilder m h - -> m (HasFS m h, HasBlockIO m h, RunFsPaths, Bloom SerialisedKey, IndexCompact, NumEntries) + -> m (HasFS m h, HasBlockIO m h, RunFsPaths, Bloom SerialisedKey, Index, NumEntries) unsafeFinalise dropCaches RunBuilder {..} = do -- write final bits (mPage, mChunk, runFilter, runIndex, numEntries) <- diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index 9f0c6b874..7c1e9d327 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -389,7 +389,10 @@ openRuns levels' <- V.iforM levels $ \i level -> let ln = LevelNo (i+1) in - let caching = diskCachePolicyForLevel confDiskCachePolicy ln in + let + caching = diskCachePolicyForLevel confDiskCachePolicy ln + indexType = indexTypeForRun confFencePointerIndex + in for level $ \runNum -> do let sourcePaths = RunFsPaths sourceDir runNum runNum' <- uniqueToRunNumber <$> incrUniqCounter uc @@ -397,7 +400,7 @@ openRuns hardLinkRunFiles reg hfs hbio sourcePaths targetPaths withRollback reg - (Run.openFromDisk hfs hbio caching targetPaths) + (Run.openFromDisk hfs hbio caching indexType targetPaths) releaseRef pure (SnapLevels levels') @@ -449,6 +452,7 @@ fromSnapLevels reg hfs hbio conf@TableConfig{..} uc resolve dir (SnapLevels leve where caching = diskCachePolicyForLevel confDiskCachePolicy ln alloc = bloomFilterAllocForLevel conf ln + indexType = indexTypeForRun confFencePointerIndex fromSnapIncomingRun :: SnapIncomingRun (Ref (Run m h)) @@ -464,7 +468,7 @@ fromSnapLevels reg hfs hbio conf@TableConfig{..} uc resolve dir (SnapLevels leve SnapOngoingMerge runs mt -> do rn <- uniqueToRunNumber <$> incrUniqCounter uc mr <- withRollback reg - (MR.new hfs hbio resolve caching alloc mt (mkPath rn) runs) + (MR.new hfs hbio resolve caching alloc indexType mt (mkPath rn) runs) releaseRef -- When a snapshot is created, merge progress is lost, so we -- have to redo merging work here. SuppliedCredits tracks how diff --git a/src/Database/LSMTree/Internal/Vector/Growing.hs b/src/Database/LSMTree/Internal/Vector/Growing.hs index 1fe7e530b..cd091fd20 100644 --- a/src/Database/LSMTree/Internal/Vector/Growing.hs +++ b/src/Database/LSMTree/Internal/Vector/Growing.hs @@ -5,7 +5,7 @@ -- | Vectors with support for appending elements. module Database.LSMTree.Internal.Vector.Growing ( - GrowingVector, + GrowingVector (GrowingVector), new, append, freeze diff --git a/test/Main.hs b/test/Main.hs index 200015086..004572347 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -58,6 +58,8 @@ main = do , Test.Database.LSMTree.Internal.Chunk.tests , Test.Database.LSMTree.Internal.CRC32C.tests , Test.Database.LSMTree.Internal.Entry.tests + , Test.Database.LSMTree.Internal.Index.Compact.tests + , Test.Database.LSMTree.Internal.Index.Ordinary.tests , Test.Database.LSMTree.Internal.Lookup.tests , Test.Database.LSMTree.Internal.Merge.tests , Test.Database.LSMTree.Internal.MergingRun.tests @@ -72,8 +74,6 @@ main = do , Test.Database.LSMTree.Internal.RunBuilder.tests , Test.Database.LSMTree.Internal.RunReader.tests , Test.Database.LSMTree.Internal.RunReaders.tests - , Test.Database.LSMTree.Internal.Index.Compact.tests - , Test.Database.LSMTree.Internal.Index.Ordinary.tests , Test.Database.LSMTree.Internal.Serialise.tests , Test.Database.LSMTree.Internal.Serialise.Class.tests , Test.Database.LSMTree.Internal.Snapshot.Codec.tests diff --git a/test/Test/Database/LSMTree/Internal/Index/Compact.hs b/test/Test/Database/LSMTree/Internal/Index/Compact.hs index 22890d994..61d2a368b 100644 --- a/test/Test/Database/LSMTree/Internal/Index/Compact.hs +++ b/test/Test/Database/LSMTree/Internal/Index/Compact.hs @@ -1,5 +1,4 @@ {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE MagicHash #-} {-# LANGUAGE RecordWildCards #-} {-# OPTIONS_GHC -Wno-orphans #-} {- HLINT ignore "Eta reduce" -} @@ -29,16 +28,15 @@ import qualified Data.Vector.Unboxed.Base as VU import Data.Word import Database.LSMTree.Extras import Database.LSMTree.Extras.Generators as Gen -import Database.LSMTree.Extras.Index as Index (Append (..), append) +import Database.LSMTree.Extras.Index (Append (..), appendToCompact) import Database.LSMTree.Internal.BitMath import Database.LSMTree.Internal.Chunk as Chunk (toByteString) import Database.LSMTree.Internal.Entry (NumEntries (..)) -import Database.LSMTree.Internal.Index.Compact as IndexCompact -import Database.LSMTree.Internal.Index.CompactAcc as IndexCompact +import Database.LSMTree.Internal.Index.Compact +import Database.LSMTree.Internal.Index.CompactAcc import Database.LSMTree.Internal.Page (PageNo (PageNo), PageSpan, multiPage, singlePage) import Database.LSMTree.Internal.Serialise -import GHC.Exts (proxy#) import Numeric (showHex) import Prelude hiding (max, min, pi) import qualified Test.QuickCheck as QC @@ -71,9 +69,9 @@ tests = testGroup "Test.Database.LSMTree.Internal.Index.Compact" [ let k2 = SerialisedKey' (VP.replicate 16 0x11) let k3 = SerialisedKey' (VP.replicate 15 0x11 <> VP.replicate 1 0x12) let (chunks, index) = runST $ do - ica <- IndexCompact.new 16 - ch1 <- flip Index.append ica $ AppendSinglePage k1 k2 - ch2 <- flip Index.append ica $ AppendSinglePage k3 k3 + ica <- new 16 + ch1 <- flip appendToCompact ica $ AppendSinglePage k1 k2 + ch2 <- flip appendToCompact ica $ AppendSinglePage k3 k3 (mCh3, idx) <- unsafeEnd ica return (ch1 <> ch2 <> toList mCh3, idx) @@ -101,7 +99,7 @@ tests = testGroup "Test.Database.LSMTree.Internal.Index.Compact" [ , 7, 0 ] - let header = LBS.unpack (headerLBS (proxy# @IndexCompact)) + let header = LBS.unpack headerLBS let primary = LBS.unpack $ LBS.fromChunks (map Chunk.toByteString chunks) let rest = LBS.unpack (finalLBS (NumEntries 7) index) @@ -233,7 +231,7 @@ prop_roundtrip_chunks (Chunks chunks index) numEntries = counterexample ("rest:\n" <> showBS bsRest) $ Right (numEntries, index) === fromSBS sbs where - bsVersion = headerLBS (proxy# @IndexCompact) + bsVersion = headerLBS bsPrimary = LBS.fromChunks $ map (Chunk.toByteString . word64VectorToChunk) chunks bsRest = finalLBS numEntries index @@ -291,11 +289,11 @@ prop_total_deserialisation_whitebox numEntries numPages word32s = writeIndexCompact :: SerialiseKey k => NumEntries -> ChunkSize -> LogicalPageSummaries k -> (LBS.ByteString, LBS.ByteString, LBS.ByteString) writeIndexCompact numEntries (ChunkSize csize) ps = runST $ do - ica <- IndexCompact.new csize - cs <- mapM (`append` ica) (toAppends ps) + ica <- new csize + cs <- mapM (`appendToCompact` ica) (toAppends ps) (c, index) <- unsafeEnd ica return - ( headerLBS (proxy# @IndexCompact) + ( headerLBS , LBS.fromChunks $ foldMap (map Chunk.toByteString) $ cs <> pure (toList c) , finalLBS numEntries index @@ -308,7 +306,7 @@ fromPageSummaries (ChunkSize csize) ps = fromList :: Int -> [Append] -> IndexCompact fromList maxcsize apps = runST $ do ica <- new maxcsize - mapM_ (`append` ica) apps + mapM_ (`appendToCompact` ica) apps (_, index) <- unsafeEnd ica pure index @@ -327,7 +325,7 @@ labelIndex ic = . QC.tabulate "Length of contiguous clash runs" (fmap (showPowersOf10 . snd) nscontig) . QC.tabulate "Contiguous clashes contain multi-page values" (fmap (show . fst) nscontig) . QC.classify (multiPageValuesClash ic) "Has clashing multi-page values" - where nclashes = IndexCompact.countClashes ic + where nclashes = countClashes ic nscontig = countContiguousClashes ic multiPageValuesClash :: IndexCompact -> Bool diff --git a/test/Test/Database/LSMTree/Internal/Index/Ordinary.hs b/test/Test/Database/LSMTree/Internal/Index/Ordinary.hs index 8200d2a29..a1f28f6dd 100644 --- a/test/Test/Database/LSMTree/Internal/Index/Ordinary.hs +++ b/test/Test/Database/LSMTree/Internal/Index/Ordinary.hs @@ -26,7 +26,8 @@ import Data.Word (Word16, Word32, Word64, Word8) import Database.LSMTree.Extras.Generators (LogicalPageSummaries, toAppends) import Database.LSMTree.Extras.Index - (Append (AppendMultiPage, AppendSinglePage), append) + (Append (AppendMultiPage, AppendSinglePage), + appendToOrdinary) import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteVector) import Database.LSMTree.Internal.Entry (NumEntries (NumEntries)) import Database.LSMTree.Internal.Index.Ordinary @@ -258,7 +259,7 @@ lastKeysBlockFromAppends appends = lastKeysBlock where incrementalConstruction :: [Append] -> (IndexOrdinary, Primitive.Vector Word8) incrementalConstruction appends = runST $ do acc <- new initialKeyBufferSize minChunkSize - commonChunks <- concat <$> mapM (flip append acc) appends + commonChunks <- concat <$> mapM (flip appendToOrdinary acc) appends (remnant, unserialised) <- unsafeEnd acc let @@ -362,8 +363,7 @@ prop_numberOfEntriesFromSerialisedIndexWorks entryCount lastKeys where errorMsgOrEntryCount :: Either String NumEntries - errorMsgOrEntryCount - = fst <$> fromSBS @IndexOrdinary (serialisedIndex entryCount lastKeys) + errorMsgOrEntryCount = fst <$> fromSBS (serialisedIndex entryCount lastKeys) noErrorMsgButCorrectEntryCount :: Either String NumEntries noErrorMsgButCorrectEntryCount = Right entryCount @@ -381,7 +381,7 @@ prop_indexFromSerialisedIndexWorks entryCount lastKeys prop_tooShortInputMakesDeserialisationFail :: TooShortByteString -> Bool prop_tooShortInputMakesDeserialisationFail - = isLeft . fromSBS @IndexOrdinary . fromTooShortByteString + = isLeft . fromSBS . fromTooShortByteString prop_typeAndVersionErrorMakesDeserialisationFail :: Word32 -> [SerialisedKey] @@ -408,7 +408,7 @@ prop_partialKeySizeBlockMakesDeserialisationFail lastKeys partialKeySizeByte entryCount = isLeft $ - fromSBS @IndexOrdinary $ + fromSBS $ potentialSerialisedIndex testedTypeAndVersionBlock (lastKeysBlocks lastKeys ++ [Primitive.singleton partialKeySizeByte]) @@ -424,7 +424,7 @@ prop_partialKeyBlockMakesDeserialisationFail lastKeys partialKeyBlock entryCount = fromIntegral statedSize > Primitive.length partialKeyBlock ==> - isLeft (fromSBS @IndexOrdinary input) + isLeft (fromSBS input) where statedSizeBlock :: Primitive.Vector Word8 diff --git a/test/Test/Database/LSMTree/Internal/Lookup.hs b/test/Test/Database/LSMTree/Internal/Lookup.hs index a5fe9ca3a..430ae4187 100644 --- a/test/Test/Database/LSMTree/Internal/Lookup.hs +++ b/test/Test/Database/LSMTree/Internal/Lookup.hs @@ -47,7 +47,9 @@ import Database.LSMTree.Extras.RunData (RunData (..), unsafeFlushAsWriteBuffer) import Database.LSMTree.Internal.BlobRef import Database.LSMTree.Internal.Entry as Entry -import Database.LSMTree.Internal.Index.Compact +import Database.LSMTree.Internal.Index (Index, IndexType) +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact), + search) import Database.LSMTree.Internal.Lookup import Database.LSMTree.Internal.Page (PageNo (PageNo), PageSpan (..)) import Database.LSMTree.Internal.Paths (RunFsPaths (..)) @@ -162,7 +164,7 @@ prop_indexSearchesModel dats = model rkixs = V.fromList $ indexSearchesModel (fmap thrd3 runs) lookupss $ rkixs indexSearchesModel :: - [IndexCompact] + [Index] -> [SerialisedKey] -> [(RunIx, KeyIx)] -> [PageSpan] @@ -170,7 +172,7 @@ indexSearchesModel cs ks rkixs = flip fmap rkixs $ \(rix, kix) -> let c = cs List.!! rix k = ks List.!! kix - in search k c + in Index.search k c prop_prepLookupsModel :: SmallList (InMemLookupData SerialisedKey SerialisedValue BlobSpan) @@ -193,7 +195,7 @@ prop_prepLookupsModel dats = real === model model = prepLookupsModel (fmap (\x -> (snd3 x, thrd3 x)) runs) lookupss prepLookupsModel :: - [(Bloom SerialisedKey, IndexCompact)] + [(Bloom SerialisedKey, Index)] -> [SerialisedKey] -> ([(RunIx, KeyIx)], [PageSpan]) prepLookupsModel rs ks = unzip @@ -201,7 +203,7 @@ prepLookupsModel rs ks = unzip | (rix, (b, c)) <- zip [0..] rs , (kix, k) <- zip [0..] ks , Bloom.elem k b - , let pspan = search k c + , let pspan = Index.search k c ] {------------------------------------------------------------------------------- @@ -298,7 +300,7 @@ prop_roundtripFromWriteBufferLookupIO :: prop_roundtripFromWriteBufferLookupIO (SmallList dats) = ioProperty $ withTempIOHasBlockIO "prop_roundtripFromWriteBufferLookupIO" $ \hfs hbio -> - withRuns hfs hbio dats $ \wb wbblobs runs -> do + withRuns hfs hbio Index.Compact dats $ \wb wbblobs runs -> do let model :: Map SerialisedKey (Entry SerialisedValue SerialisedBlob) model = Map.unionsWith (Entry.combine resolveV) (map runData dats) keys = V.fromList [ k | InMemLookupData{lookups} <- dats @@ -333,19 +335,20 @@ prop_roundtripFromWriteBufferLookupIO (SmallList dats) = -- withRuns :: FS.HasFS IO h -> FS.HasBlockIO IO h + -> IndexType -> [InMemLookupData SerialisedKey SerialisedValue SerialisedBlob] -> ( WB.WriteBuffer -> Ref (WBB.WriteBufferBlobs IO h) -> V.Vector (Ref (Run.Run IO h)) -> IO a) -> IO a -withRuns hfs _ [] action = +withRuns hfs _ _ [] action = bracket (WBB.new hfs (FS.mkFsPath ["wbblobs"])) releaseRef (\wbblobs -> action WB.empty wbblobs V.empty) -withRuns hfs hbio (wbdat:rundats) action = +withRuns hfs hbio indexType (wbdat:rundats) action = bracket (do wbblobs <- WBB.new hfs (FS.mkFsPath ["wbblobs"]) wbkops <- traverse (traverse (WBB.addBlob hfs wbblobs)) @@ -354,7 +357,7 @@ withRuns hfs hbio (wbdat:rundats) action = runs <- V.fromList <$> sequence - [ unsafeFlushAsWriteBuffer hfs hbio fsPaths (RunData runData) + [ unsafeFlushAsWriteBuffer hfs hbio indexType fsPaths (RunData runData) | (i, InMemLookupData{runData}) <- zip [1..] rundats , let fsPaths = RunFsPaths (FS.mkFsPath []) (RunNumber i) ] @@ -419,11 +422,11 @@ thrd3 (_, _, c) = c runWithHandle :: TestRun -> ( Handle (Map Int (Either RawPage RawOverflowPage)) - , Bloom SerialisedKey, IndexCompact + , Bloom SerialisedKey, Index ) runWithHandle (rawPages, b, ic) = (Handle rawPages (mkFsPath ["do not use"]), b, ic) -type TestRun = (Map Int (Either RawPage RawOverflowPage), Bloom SerialisedKey, IndexCompact) +type TestRun = (Map Int (Either RawPage RawOverflowPage), Bloom SerialisedKey, Index) mkTestRun :: Map SerialisedKey (Entry SerialisedValue BlobSpan) -> TestRun mkTestRun dat = (rawPages, b, ic) @@ -432,7 +435,7 @@ mkTestRun dat = (rawPages, b, ic) -- one-shot run construction (pages, b, ic) = runST $ do - racc <- Run.new nentries (RunAllocFixed 10) + racc <- Run.new nentries (RunAllocFixed 10) Index.Compact let kops = Map.toList dat psopss <- traverse (uncurry (Run.addKeyOp racc)) kops (mp, _ , b', ic', _) <- Run.unsafeFinalise racc diff --git a/test/Test/Database/LSMTree/Internal/Merge.hs b/test/Test/Database/LSMTree/Internal/Merge.hs index 1507490da..1321a9429 100644 --- a/test/Test/Database/LSMTree/Internal/Merge.hs +++ b/test/Test/Database/LSMTree/Internal/Merge.hs @@ -14,6 +14,7 @@ import Database.LSMTree.Extras.Generators (KeyForIndexCompact) import Database.LSMTree.Extras.RunData import qualified Database.LSMTree.Internal.BlobFile as BlobFile import qualified Database.LSMTree.Internal.Entry as Entry +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Merge (MergeType (..)) import qualified Database.LSMTree.Internal.Merge as Merge import Database.LSMTree.Internal.PageAcc (entryWouldFitInPage) @@ -69,11 +70,11 @@ prop_MergeDistributes :: SmallList (RunData KeyForIndexCompact SerialisedValue SerialisedBlob) -> IO Property prop_MergeDistributes fs hbio mergeType stepSize (SmallList rds) = - withRuns fs hbio (V.fromList (zip (simplePaths [10..]) rds')) $ \runs -> do + withRuns fs hbio Index.Compact (V.fromList (zip (simplePaths [10..]) rds')) $ \runs -> do let stepsNeeded = sum (map (Map.size . unRunData) rds) (stepsDone, lhs) <- mergeRuns fs hbio mergeType (RunNumber 0) runs stepSize let runData = RunData $ mergeWriteBuffers mergeType $ fmap unRunData rds' - withRun fs hbio (simplePath 1) runData $ \rhs -> do + withRun fs hbio Index.Compact (simplePath 1) runData $ \rhs -> do (lhsSize, lhsFilter, lhsIndex, lhsKOps, lhsKOpsFileContent, lhsBlobFileContent) <- getRunContent lhs @@ -141,7 +142,7 @@ prop_AbortMerge :: SmallList (RunData KeyForIndexCompact SerialisedValue SerialisedBlob) -> IO Property prop_AbortMerge fs hbio mergeType (Positive stepSize) (SmallList wbs) = - withRuns fs hbio (V.fromList (zip (simplePaths [10..]) wbs')) $ \runs -> do + withRuns fs hbio Index.Compact (V.fromList (zip (simplePaths [10..]) wbs')) $ \runs -> do let path0 = simplePath 0 mergeToClose <- makeInProgressMerge path0 runs traverse_ Merge.abort mergeToClose @@ -155,7 +156,7 @@ prop_AbortMerge fs hbio mergeType (Positive stepSize) (SmallList wbs) = wbs' = fmap serialiseRunData wbs makeInProgressMerge path runs = - Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10) + Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10) Index.Compact mergeType mappendValues path runs >>= \case Nothing -> return Nothing -- not in progress Just merge -> do @@ -182,10 +183,10 @@ mergeRuns :: StepSize -> IO (Int, Ref (Run.Run IO h)) mergeRuns fs hbio mergeType runNumber runs (Positive stepSize) = do - Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10) + Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10) Index.Compact mergeType mappendValues (RunFsPaths (FS.mkFsPath []) runNumber) runs >>= \case - Nothing -> (,) 0 <$> unsafeFlushAsWriteBuffer fs hbio + Nothing -> (,) 0 <$> unsafeFlushAsWriteBuffer fs hbio Index.Compact (RunFsPaths (FS.mkFsPath []) runNumber) (RunData Map.empty) Just m -> Merge.stepsToCompletionCounted m stepSize diff --git a/test/Test/Database/LSMTree/Internal/Run.hs b/test/Test/Database/LSMTree/Internal/Run.hs index d7739f064..b9b591a3b 100644 --- a/test/Test/Database/LSMTree/Internal/Run.hs +++ b/test/Test/Database/LSMTree/Internal/Run.hs @@ -19,6 +19,7 @@ import Database.LSMTree.Extras.RunData import Database.LSMTree.Internal.BlobRef (BlobSpan (..)) import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Paths (RunFsPaths (..), WriteBufferFsPaths (..)) import qualified Database.LSMTree.Internal.Paths as Paths @@ -96,7 +97,7 @@ testSingleInsert sessionRoot key val mblob = -- flush write buffer let e = case mblob of Nothing -> Insert val; Just blob -> InsertWithBlob val blob wb = Map.singleton key e - withRun fs hbio (simplePath 42) (RunData wb) $ \_ -> do + withRun fs hbio Index.Compact (simplePath 42) (RunData wb) $ \_ -> do -- check all files have been written let activeDir = sessionRoot bsKOps <- BS.readFile (activeDir "42.keyops") @@ -178,7 +179,7 @@ prop_WriteNumEntries :: -> RunData KeyForIndexCompact SerialisedValue SerialisedBlob -> IO Property prop_WriteNumEntries fs hbio wb@(RunData m) = - withRun fs hbio (simplePath 42) wb' $ \run -> do + withRun fs hbio Index.Compact (simplePath 42) wb' $ \run -> do let !runSize = Run.size run return . labelRunData wb' $ @@ -196,12 +197,12 @@ prop_WriteAndOpen :: -> RunData KeyForIndexCompact SerialisedValue SerialisedBlob -> IO Property prop_WriteAndOpen fs hbio wb = - withRun fs hbio (simplePath 1337) (serialiseRunData wb) $ \written -> + withRun fs hbio Index.Compact (simplePath 1337) (serialiseRunData wb) $ \written -> withActionRegistry $ \reg -> do let paths = Run.runFsPaths written paths' = paths { runNumber = RunNumber 17} hardLinkRunFiles reg fs hbio paths paths' - loaded <- openFromDisk fs hbio CacheRunData (simplePath 17) + loaded <- openFromDisk fs hbio CacheRunData Index.Compact (simplePath 17) Run.size written @=? Run.size loaded withRef written $ \written' -> @@ -257,7 +258,7 @@ prop_WriteRunEqWriteWriteBuffer hfs hbio rd = do let rdPaths = simplePath 1337 let rdKOpsFile = Paths.runKOpsPath rdPaths let rdBlobFile = Paths.runBlobPath rdPaths - withRun hfs hbio rdPaths srd $ \_run -> do + withRun hfs hbio Index.Compact rdPaths srd $ \_run -> do -- Serialise run data as write buffer: let f (SerialisedValue x) (SerialisedValue y) = SerialisedValue (x <> y) let inPaths = WrapRunFsPaths $ simplePath 1111 diff --git a/test/Test/Database/LSMTree/Internal/RunAcc.hs b/test/Test/Database/LSMTree/Internal/RunAcc.hs index ea8e9aa38..9748bff7e 100644 --- a/test/Test/Database/LSMTree/Internal/RunAcc.hs +++ b/test/Test/Database/LSMTree/Internal/RunAcc.hs @@ -15,7 +15,8 @@ import Data.Maybe import qualified Data.Vector.Primitive as VP import Database.LSMTree.Internal.BlobRef (BlobSpan (..)) import Database.LSMTree.Internal.Entry -import qualified Database.LSMTree.Internal.Index as Index +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact), + search) import Database.LSMTree.Internal.Page (PageNo (PageNo), singlePage) import qualified Database.LSMTree.Internal.PageAcc as PageAcc import qualified Database.LSMTree.Internal.PageAcc1 as PageAcc @@ -56,7 +57,7 @@ test_singleKeyRun = do !e = InsertWithBlob (SerialisedValue' (VP.fromList [48, 19])) (BlobSpan 55 77) (addRes, (mp, mc, b, ic, _numEntries)) <- stToIO $ do - racc <- new (NumEntries 1) (RunAllocFixed 10) + racc <- new (NumEntries 1) (RunAllocFixed 10) Index.Compact addRes <- addKeyOp racc k e (addRes,) <$> unsafeFinalise racc diff --git a/test/Test/Database/LSMTree/Internal/RunBuilder.hs b/test/Test/Database/LSMTree/Internal/RunBuilder.hs index ce6a2693e..ed3fac40d 100644 --- a/test/Test/Database/LSMTree/Internal/RunBuilder.hs +++ b/test/Test/Database/LSMTree/Internal/RunBuilder.hs @@ -5,6 +5,7 @@ module Test.Database.LSMTree.Internal.RunBuilder (tests) where import Control.Monad.Class.MonadThrow import Data.Foldable (traverse_) import Database.LSMTree.Internal.Entry (NumEntries (..)) +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Paths (RunFsPaths (..)) import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..)) import qualified Database.LSMTree.Internal.RunBuilder as RunBuilder @@ -47,7 +48,7 @@ prop_newInExistingDir hfs hbio = do let runDir = FS.mkFsPath ["a", "b", "c"] FS.createDirectoryIfMissing hfs True runDir bracket - (try (RunBuilder.new hfs hbio (RunFsPaths runDir (RunNumber 17)) (NumEntries 0) (RunAllocFixed 10))) + (try (RunBuilder.new hfs hbio (RunFsPaths runDir (RunNumber 17)) (NumEntries 0) (RunAllocFixed 10) Index.Compact)) (traverse_ RunBuilder.close) $ pure . \case Left e@FS.FsError{} -> counterexample ("expected a success, but got: " <> show e) $ property False @@ -58,7 +59,7 @@ prop_newInNonExistingDir :: HasFS IO h -> FS.HasBlockIO IO h -> IO Property prop_newInNonExistingDir hfs hbio = do let runDir = FS.mkFsPath ["a", "b", "c"] bracket - (try (RunBuilder.new hfs hbio (RunFsPaths runDir (RunNumber 17)) (NumEntries 0) (RunAllocFixed 10))) + (try (RunBuilder.new hfs hbio (RunFsPaths runDir (RunNumber 17)) (NumEntries 0) (RunAllocFixed 10) Index.Compact)) (traverse_ RunBuilder.close) $ pure . \case Left FS.FsError{} -> property True Right _ -> @@ -72,10 +73,10 @@ prop_newTwice :: HasFS IO h -> FS.HasBlockIO IO h -> IO Property prop_newTwice hfs hbio = do let runDir = FS.mkFsPath [] bracket - (RunBuilder.new hfs hbio (RunFsPaths runDir (RunNumber 17)) (NumEntries 0) (RunAllocFixed 10)) + (RunBuilder.new hfs hbio (RunFsPaths runDir (RunNumber 17)) (NumEntries 0) (RunAllocFixed 10) Index.Compact) RunBuilder.close $ \_ -> bracket - (try (RunBuilder.new hfs hbio (RunFsPaths runDir (RunNumber 17)) (NumEntries 0) (RunAllocFixed 10))) + (try (RunBuilder.new hfs hbio (RunFsPaths runDir (RunNumber 17)) (NumEntries 0) (RunAllocFixed 10) Index.Compact)) (traverse_ RunBuilder.close) $ pure . \case Left FS.FsError{} -> property True Right _ -> diff --git a/test/Test/Database/LSMTree/Internal/RunReader.hs b/test/Test/Database/LSMTree/Internal/RunReader.hs index 89aa419ab..ec9d18c2d 100644 --- a/test/Test/Database/LSMTree/Internal/RunReader.hs +++ b/test/Test/Database/LSMTree/Internal/RunReader.hs @@ -12,6 +12,7 @@ import Database.LSMTree.Extras.Generators (KeyForIndexCompact (..)) import Database.LSMTree.Extras.RunData import Database.LSMTree.Internal.BlobRef import Database.LSMTree.Internal.Entry (Entry) +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Run (Run) import qualified Database.LSMTree.Internal.RunReader as Reader import Database.LSMTree.Internal.Serialise @@ -78,7 +79,7 @@ prop_readAtOffset :: -> Maybe KeyForIndexCompact -> IO Property prop_readAtOffset fs hbio rd offsetKey = - withRun fs hbio (simplePath 42) rd' $ \run -> do + withRun fs hbio Index.Compact (simplePath 42) rd' $ \run -> do rhs <- readKOps (coerce offsetKey) run return . labelRunData rd' $ @@ -122,7 +123,7 @@ prop_readAtOffsetIdempotence :: -> Maybe KeyForIndexCompact -> IO Property prop_readAtOffsetIdempotence fs hbio rd offsetKey = - withRun fs hbio (simplePath 42) rd' $ \run -> do + withRun fs hbio Index.Compact (simplePath 42) rd' $ \run -> do lhs <- readKOps (coerce offsetKey) run rhs <- readKOps (coerce offsetKey) run @@ -146,7 +147,7 @@ prop_readAtOffsetReadHead :: -> RunData KeyForIndexCompact SerialisedValue SerialisedBlob -> IO Property prop_readAtOffsetReadHead fs hbio rd = - withRun fs hbio (simplePath 42) rd' $ \run -> do + withRun fs hbio Index.Compact (simplePath 42) rd' $ \run -> do lhs <- readKOps Nothing run rhs <- case lhs of [] -> return [] diff --git a/test/Test/Database/LSMTree/Internal/RunReaders.hs b/test/Test/Database/LSMTree/Internal/RunReaders.hs index 0fe724111..5b8721658 100644 --- a/test/Test/Database/LSMTree/Internal/RunReaders.hs +++ b/test/Test/Database/LSMTree/Internal/RunReaders.hs @@ -20,6 +20,7 @@ import Database.LSMTree.Extras.Generators (KeyForIndexCompact (..)) import Database.LSMTree.Extras.RunData import Database.LSMTree.Internal.BlobRef import Database.LSMTree.Internal.Entry +import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import qualified Database.LSMTree.Internal.Paths as Paths import qualified Database.LSMTree.Internal.Run as Run import Database.LSMTree.Internal.RunNumber @@ -335,7 +336,7 @@ runIO act lu = case act of wbs' = fmap serialiseRunData wbs runs <- zipWithM - (\p -> liftIO . unsafeFlushAsWriteBuffer hfs hbio p) + (\p -> liftIO . unsafeFlushAsWriteBuffer hfs hbio Index.Compact p) (Paths.RunFsPaths (FS.mkFsPath []) . RunNumber <$> [numRuns ..]) wbs' newReaders <- liftIO $ do